File: | pixman/pixman-sse2.c |
Location: | line 5808, column 1 |
Description: | Assigned value is garbage or undefined |
1 | /* | ||||||
2 | * Copyright © 2008 Rodrigo Kumpera | ||||||
3 | * Copyright © 2008 André Tupinambá | ||||||
4 | * | ||||||
5 | * Permission to use, copy, modify, distribute, and sell this software and its | ||||||
6 | * documentation for any purpose is hereby granted without fee, provided that | ||||||
7 | * the above copyright notice appear in all copies and that both that | ||||||
8 | * copyright notice and this permission notice appear in supporting | ||||||
9 | * documentation, and that the name of Red Hat not be used in advertising or | ||||||
10 | * publicity pertaining to distribution of the software without specific, | ||||||
11 | * written prior permission. Red Hat makes no representations about the | ||||||
12 | * suitability of this software for any purpose. It is provided "as is" | ||||||
13 | * without express or implied warranty. | ||||||
14 | * | ||||||
15 | * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS | ||||||
16 | * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND | ||||||
17 | * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||||||
18 | * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||||||
19 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN | ||||||
20 | * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING | ||||||
21 | * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | ||||||
22 | * SOFTWARE. | ||||||
23 | * | ||||||
24 | * Author: Rodrigo Kumpera (kumpera@gmail.com) | ||||||
25 | * André Tupinambá (andrelrt@gmail.com) | ||||||
26 | * | ||||||
27 | * Based on work by Owen Taylor and Søren Sandmann | ||||||
28 | */ | ||||||
29 | #ifdef HAVE_CONFIG_H1 | ||||||
30 | #include <config.h> | ||||||
31 | #endif | ||||||
32 | |||||||
33 | /* PSHUFD is slow on a lot of old processors, and new processors have SSSE3 */ | ||||||
34 | #define PSHUFD_IS_FAST0 0 | ||||||
35 | |||||||
36 | #include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */ | ||||||
37 | #include <emmintrin.h> /* for SSE2 intrinsics */ | ||||||
38 | #include "pixman-private.h" | ||||||
39 | #include "pixman-combine32.h" | ||||||
40 | #include "pixman-inlines.h" | ||||||
41 | |||||||
42 | static __m128i mask_0080; | ||||||
43 | static __m128i mask_00ff; | ||||||
44 | static __m128i mask_0101; | ||||||
45 | static __m128i mask_ffff; | ||||||
46 | static __m128i mask_ff000000; | ||||||
47 | static __m128i mask_alpha; | ||||||
48 | |||||||
49 | static __m128i mask_565_r; | ||||||
50 | static __m128i mask_565_g1, mask_565_g2; | ||||||
51 | static __m128i mask_565_b; | ||||||
52 | static __m128i mask_red; | ||||||
53 | static __m128i mask_green; | ||||||
54 | static __m128i mask_blue; | ||||||
55 | |||||||
56 | static __m128i mask_565_fix_rb; | ||||||
57 | static __m128i mask_565_fix_g; | ||||||
58 | |||||||
59 | static __m128i mask_565_rb; | ||||||
60 | static __m128i mask_565_pack_multiplier; | ||||||
61 | |||||||
62 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
63 | unpack_32_1x128 (uint32_t data) | ||||||
64 | { | ||||||
65 | return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ()); | ||||||
66 | } | ||||||
67 | |||||||
68 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
69 | unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi) | ||||||
70 | { | ||||||
71 | *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ()); | ||||||
72 | *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ()); | ||||||
73 | } | ||||||
74 | |||||||
75 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
76 | unpack_565_to_8888 (__m128i lo) | ||||||
77 | { | ||||||
78 | __m128i r, g, b, rb, t; | ||||||
79 | |||||||
80 | r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red); | ||||||
81 | g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green); | ||||||
82 | b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue); | ||||||
83 | |||||||
84 | rb = _mm_or_si128 (r, b); | ||||||
85 | t = _mm_and_si128 (rb, mask_565_fix_rb); | ||||||
86 | t = _mm_srli_epi32 (t, 5); | ||||||
87 | rb = _mm_or_si128 (rb, t); | ||||||
88 | |||||||
89 | t = _mm_and_si128 (g, mask_565_fix_g); | ||||||
90 | t = _mm_srli_epi32 (t, 6); | ||||||
91 | g = _mm_or_si128 (g, t); | ||||||
92 | |||||||
93 | return _mm_or_si128 (rb, g); | ||||||
94 | } | ||||||
95 | |||||||
96 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
97 | unpack_565_128_4x128 (__m128i data, | ||||||
98 | __m128i* data0, | ||||||
99 | __m128i* data1, | ||||||
100 | __m128i* data2, | ||||||
101 | __m128i* data3) | ||||||
102 | { | ||||||
103 | __m128i lo, hi; | ||||||
104 | |||||||
105 | lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ()); | ||||||
106 | hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ()); | ||||||
107 | |||||||
108 | lo = unpack_565_to_8888 (lo); | ||||||
109 | hi = unpack_565_to_8888 (hi); | ||||||
110 | |||||||
111 | unpack_128_2x128 (lo, data0, data1); | ||||||
112 | unpack_128_2x128 (hi, data2, data3); | ||||||
113 | } | ||||||
114 | |||||||
115 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t | ||||||
116 | pack_565_32_16 (uint32_t pixel) | ||||||
117 | { | ||||||
118 | return (uint16_t) (((pixel >> 8) & 0xf800) | | ||||||
119 | ((pixel >> 5) & 0x07e0) | | ||||||
120 | ((pixel >> 3) & 0x001f)); | ||||||
121 | } | ||||||
122 | |||||||
123 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
124 | pack_2x128_128 (__m128i lo, __m128i hi) | ||||||
125 | { | ||||||
126 | return _mm_packus_epi16 (lo, hi); | ||||||
127 | } | ||||||
128 | |||||||
129 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
130 | pack_565_2packedx128_128 (__m128i lo, __m128i hi) | ||||||
131 | { | ||||||
132 | __m128i rb0 = _mm_and_si128 (lo, mask_565_rb); | ||||||
133 | __m128i rb1 = _mm_and_si128 (hi, mask_565_rb); | ||||||
134 | |||||||
135 | __m128i t0 = _mm_madd_epi16 (rb0, mask_565_pack_multiplier); | ||||||
136 | __m128i t1 = _mm_madd_epi16 (rb1, mask_565_pack_multiplier); | ||||||
137 | |||||||
138 | __m128i g0 = _mm_and_si128 (lo, mask_green); | ||||||
139 | __m128i g1 = _mm_and_si128 (hi, mask_green); | ||||||
140 | |||||||
141 | t0 = _mm_or_si128 (t0, g0); | ||||||
142 | t1 = _mm_or_si128 (t1, g1); | ||||||
143 | |||||||
144 | /* Simulates _mm_packus_epi32 */ | ||||||
145 | t0 = _mm_slli_epi32 (t0, 16 - 5); | ||||||
146 | t1 = _mm_slli_epi32 (t1, 16 - 5); | ||||||
147 | t0 = _mm_srai_epi32 (t0, 16); | ||||||
148 | t1 = _mm_srai_epi32 (t1, 16); | ||||||
149 | return _mm_packs_epi32 (t0, t1); | ||||||
150 | } | ||||||
151 | |||||||
152 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
153 | pack_565_2x128_128 (__m128i lo, __m128i hi) | ||||||
154 | { | ||||||
155 | __m128i data; | ||||||
156 | __m128i r, g1, g2, b; | ||||||
157 | |||||||
158 | data = pack_2x128_128 (lo, hi); | ||||||
159 | |||||||
160 | r = _mm_and_si128 (data, mask_565_r); | ||||||
161 | g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1); | ||||||
162 | g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2); | ||||||
163 | b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b); | ||||||
164 | |||||||
165 | return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b); | ||||||
166 | } | ||||||
167 | |||||||
168 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
169 | pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3) | ||||||
170 | { | ||||||
171 | return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1), | ||||||
172 | pack_565_2x128_128 (*xmm2, *xmm3)); | ||||||
173 | } | ||||||
174 | |||||||
175 | static force_inline__inline__ __attribute__ ((__always_inline__)) int | ||||||
176 | is_opaque (__m128i x) | ||||||
177 | { | ||||||
178 | __m128i ffs = _mm_cmpeq_epi8 (x, x); | ||||||
179 | |||||||
180 | return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888; | ||||||
181 | } | ||||||
182 | |||||||
183 | static force_inline__inline__ __attribute__ ((__always_inline__)) int | ||||||
184 | is_zero (__m128i x) | ||||||
185 | { | ||||||
186 | return _mm_movemask_epi8 ( | ||||||
187 | _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff; | ||||||
188 | } | ||||||
189 | |||||||
190 | static force_inline__inline__ __attribute__ ((__always_inline__)) int | ||||||
191 | is_transparent (__m128i x) | ||||||
192 | { | ||||||
193 | return (_mm_movemask_epi8 ( | ||||||
194 | _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888; | ||||||
195 | } | ||||||
196 | |||||||
197 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
198 | expand_pixel_32_1x128 (uint32_t data) | ||||||
199 | { | ||||||
200 | return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0))__extension__ ({ (__m128i)__builtin_shufflevector((__v4si)(__m128i )(unpack_32_1x128 (data)), (__v4si)_mm_setzero_si128(), ((((1 ) << 6) | ((0) << 4) | ((1) << 2) | (0))) & 0x3, (((((1) << 6) | ((0) << 4) | ((1) << 2 ) | (0))) & 0xc) >> 2, (((((1) << 6) | ((0) << 4) | ((1) << 2) | (0))) & 0x30) >> 4, (((((1 ) << 6) | ((0) << 4) | ((1) << 2) | (0))) & 0xc0) >> 6); }); | ||||||
201 | } | ||||||
202 | |||||||
203 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
204 | expand_alpha_1x128 (__m128i data) | ||||||
205 | { | ||||||
206 | return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)( __m128i)(data), (__v8hi)_mm_setzero_si128(), ((((3) << 6 ) | ((3) << 4) | ((3) << 2) | (3))) & 0x3, (( (((3) << 6) | ((3) << 4) | ((3) << 2) | (3) )) & 0xc) >> 2, (((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x30) >> 4, (((((3 ) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6, 7); })), (__v8hi)_mm_setzero_si128 (), 0, 1, 2, 3, 4 + ((((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x03) >> 0), 4 + (((((( 3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6) | ((3) << 4 ) | ((3) << 2) | (3))) & 0x30) >> 4), 4 + ((( (((3) << 6) | ((3) << 4) | ((3) << 2) | (3) )) & 0xc0) >> 6)); }) | ||||||
207 | _MM_SHUFFLE (3, 3, 3, 3)),__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)( __m128i)(data), (__v8hi)_mm_setzero_si128(), ((((3) << 6 ) | ((3) << 4) | ((3) << 2) | (3))) & 0x3, (( (((3) << 6) | ((3) << 4) | ((3) << 2) | (3) )) & 0xc) >> 2, (((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x30) >> 4, (((((3 ) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6, 7); })), (__v8hi)_mm_setzero_si128 (), 0, 1, 2, 3, 4 + ((((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x03) >> 0), 4 + (((((( 3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6) | ((3) << 4 ) | ((3) << 2) | (3))) & 0x30) >> 4), 4 + ((( (((3) << 6) | ((3) << 4) | ((3) << 2) | (3) )) & 0xc0) >> 6)); }) | ||||||
208 | _MM_SHUFFLE (3, 3, 3, 3))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)( __m128i)(data), (__v8hi)_mm_setzero_si128(), ((((3) << 6 ) | ((3) << 4) | ((3) << 2) | (3))) & 0x3, (( (((3) << 6) | ((3) << 4) | ((3) << 2) | (3) )) & 0xc) >> 2, (((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x30) >> 4, (((((3 ) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6, 7); })), (__v8hi)_mm_setzero_si128 (), 0, 1, 2, 3, 4 + ((((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x03) >> 0), 4 + (((((( 3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6) | ((3) << 4 ) | ((3) << 2) | (3))) & 0x30) >> 4), 4 + ((( (((3) << 6) | ((3) << 4) | ((3) << 2) | (3) )) & 0xc0) >> 6)); }); | ||||||
209 | } | ||||||
210 | |||||||
211 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
212 | expand_alpha_2x128 (__m128i data_lo, | ||||||
213 | __m128i data_hi, | ||||||
214 | __m128i* alpha_lo, | ||||||
215 | __m128i* alpha_hi) | ||||||
216 | { | ||||||
217 | __m128i lo, hi; | ||||||
218 | |||||||
219 | lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(data_lo), (__v8hi)_mm_setzero_si128(), ((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x3, ((((( 3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0xc) >> 2, (((((3) << 6) | ((3) << 4) | (( 3) << 2) | (3))) & 0x30) >> 4, (((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6, 7); }); | ||||||
220 | hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(data_hi), (__v8hi)_mm_setzero_si128(), ((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x3, ((((( 3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0xc) >> 2, (((((3) << 6) | ((3) << 4) | (( 3) << 2) | (3))) & 0x30) >> 4, (((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6, 7); }); | ||||||
221 | |||||||
222 | *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(lo), (__v8hi)_mm_setzero_si128(), 0, 1, 2, 3, 4 + ((((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x03) >> 0), 4 + ((((((3) << 6) | ((3) << 4) | ( (3) << 2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x30) >> 4), 4 + ((((((3) << 6) | ((3) << 4 ) | ((3) << 2) | (3))) & 0xc0) >> 6)); }); | ||||||
223 | *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(hi), (__v8hi)_mm_setzero_si128(), 0, 1, 2, 3, 4 + ((((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x03) >> 0), 4 + ((((((3) << 6) | ((3) << 4) | ( (3) << 2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6) | ((3) << 4) | ((3) << 2) | (3))) & 0x30) >> 4), 4 + ((((((3) << 6) | ((3) << 4 ) | ((3) << 2) | (3))) & 0xc0) >> 6)); }); | ||||||
224 | } | ||||||
225 | |||||||
226 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
227 | expand_alpha_rev_2x128 (__m128i data_lo, | ||||||
228 | __m128i data_hi, | ||||||
229 | __m128i* alpha_lo, | ||||||
230 | __m128i* alpha_hi) | ||||||
231 | { | ||||||
232 | __m128i lo, hi; | ||||||
233 | |||||||
234 | lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(data_lo), (__v8hi)_mm_setzero_si128(), ((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x3, ((((( 0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0xc) >> 2, (((((0) << 6) | ((0) << 4) | (( 0) << 2) | (0))) & 0x30) >> 4, (((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0xc0) >> 6, 4, 5, 6, 7); }); | ||||||
235 | hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(data_hi), (__v8hi)_mm_setzero_si128(), ((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x3, ((((( 0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0xc) >> 2, (((((0) << 6) | ((0) << 4) | (( 0) << 2) | (0))) & 0x30) >> 4, (((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0xc0) >> 6, 4, 5, 6, 7); }); | ||||||
236 | *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(lo), (__v8hi)_mm_setzero_si128(), 0, 1, 2, 3, 4 + ((((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x03) >> 0), 4 + ((((((0) << 6) | ((0) << 4) | ( (0) << 2) | (0))) & 0x0c) >> 2), 4 + ((((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x30) >> 4), 4 + ((((((0) << 6) | ((0) << 4 ) | ((0) << 2) | (0))) & 0xc0) >> 6)); }); | ||||||
237 | *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(hi), (__v8hi)_mm_setzero_si128(), 0, 1, 2, 3, 4 + ((((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x03) >> 0), 4 + ((((((0) << 6) | ((0) << 4) | ( (0) << 2) | (0))) & 0x0c) >> 2), 4 + ((((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x30) >> 4), 4 + ((((((0) << 6) | ((0) << 4 ) | ((0) << 2) | (0))) & 0xc0) >> 6)); }); | ||||||
238 | } | ||||||
239 | |||||||
240 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
241 | pix_multiply_2x128 (__m128i* data_lo, | ||||||
242 | __m128i* data_hi, | ||||||
243 | __m128i* alpha_lo, | ||||||
244 | __m128i* alpha_hi, | ||||||
245 | __m128i* ret_lo, | ||||||
246 | __m128i* ret_hi) | ||||||
247 | { | ||||||
248 | __m128i lo, hi; | ||||||
249 | |||||||
250 | lo = _mm_mullo_epi16 (*data_lo, *alpha_lo); | ||||||
251 | hi = _mm_mullo_epi16 (*data_hi, *alpha_hi); | ||||||
252 | lo = _mm_adds_epu16 (lo, mask_0080); | ||||||
253 | hi = _mm_adds_epu16 (hi, mask_0080); | ||||||
254 | *ret_lo = _mm_mulhi_epu16 (lo, mask_0101); | ||||||
255 | *ret_hi = _mm_mulhi_epu16 (hi, mask_0101); | ||||||
256 | } | ||||||
257 | |||||||
258 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
259 | pix_add_multiply_2x128 (__m128i* src_lo, | ||||||
260 | __m128i* src_hi, | ||||||
261 | __m128i* alpha_dst_lo, | ||||||
262 | __m128i* alpha_dst_hi, | ||||||
263 | __m128i* dst_lo, | ||||||
264 | __m128i* dst_hi, | ||||||
265 | __m128i* alpha_src_lo, | ||||||
266 | __m128i* alpha_src_hi, | ||||||
267 | __m128i* ret_lo, | ||||||
268 | __m128i* ret_hi) | ||||||
269 | { | ||||||
270 | __m128i t1_lo, t1_hi; | ||||||
271 | __m128i t2_lo, t2_hi; | ||||||
272 | |||||||
273 | pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi); | ||||||
274 | pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi); | ||||||
275 | |||||||
276 | *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo); | ||||||
277 | *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi); | ||||||
278 | } | ||||||
279 | |||||||
280 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
281 | negate_2x128 (__m128i data_lo, | ||||||
282 | __m128i data_hi, | ||||||
283 | __m128i* neg_lo, | ||||||
284 | __m128i* neg_hi) | ||||||
285 | { | ||||||
286 | *neg_lo = _mm_xor_si128 (data_lo, mask_00ff); | ||||||
287 | *neg_hi = _mm_xor_si128 (data_hi, mask_00ff); | ||||||
288 | } | ||||||
289 | |||||||
290 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
291 | invert_colors_2x128 (__m128i data_lo, | ||||||
292 | __m128i data_hi, | ||||||
293 | __m128i* inv_lo, | ||||||
294 | __m128i* inv_hi) | ||||||
295 | { | ||||||
296 | __m128i lo, hi; | ||||||
297 | |||||||
298 | lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(data_lo), (__v8hi)_mm_setzero_si128(), ((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0x3, ((((( 3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0xc) >> 2, (((((3) << 6) | ((0) << 4) | (( 1) << 2) | (2))) & 0x30) >> 4, (((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0xc0) >> 6, 4, 5, 6, 7); }); | ||||||
299 | hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(data_hi), (__v8hi)_mm_setzero_si128(), ((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0x3, ((((( 3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0xc) >> 2, (((((3) << 6) | ((0) << 4) | (( 1) << 2) | (2))) & 0x30) >> 4, (((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0xc0) >> 6, 4, 5, 6, 7); }); | ||||||
300 | *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(lo), (__v8hi)_mm_setzero_si128(), 0, 1, 2, 3, 4 + ((((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0x03) >> 0), 4 + ((((((3) << 6) | ((0) << 4) | ( (1) << 2) | (2))) & 0x0c) >> 2), 4 + ((((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0x30) >> 4), 4 + ((((((3) << 6) | ((0) << 4 ) | ((1) << 2) | (2))) & 0xc0) >> 6)); }); | ||||||
301 | *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(hi), (__v8hi)_mm_setzero_si128(), 0, 1, 2, 3, 4 + ((((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0x03) >> 0), 4 + ((((((3) << 6) | ((0) << 4) | ( (1) << 2) | (2))) & 0x0c) >> 2), 4 + ((((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0x30) >> 4), 4 + ((((((3) << 6) | ((0) << 4 ) | ((1) << 2) | (2))) & 0xc0) >> 6)); }); | ||||||
302 | } | ||||||
303 | |||||||
304 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
305 | over_2x128 (__m128i* src_lo, | ||||||
306 | __m128i* src_hi, | ||||||
307 | __m128i* alpha_lo, | ||||||
308 | __m128i* alpha_hi, | ||||||
309 | __m128i* dst_lo, | ||||||
310 | __m128i* dst_hi) | ||||||
311 | { | ||||||
312 | __m128i t1, t2; | ||||||
313 | |||||||
314 | negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2); | ||||||
315 | |||||||
316 | pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi); | ||||||
317 | |||||||
318 | *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo); | ||||||
319 | *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi); | ||||||
320 | } | ||||||
321 | |||||||
322 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
323 | over_rev_non_pre_2x128 (__m128i src_lo, | ||||||
324 | __m128i src_hi, | ||||||
325 | __m128i* dst_lo, | ||||||
326 | __m128i* dst_hi) | ||||||
327 | { | ||||||
328 | __m128i lo, hi; | ||||||
329 | __m128i alpha_lo, alpha_hi; | ||||||
330 | |||||||
331 | expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi); | ||||||
332 | |||||||
333 | lo = _mm_or_si128 (alpha_lo, mask_alpha); | ||||||
334 | hi = _mm_or_si128 (alpha_hi, mask_alpha); | ||||||
335 | |||||||
336 | invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi); | ||||||
337 | |||||||
338 | pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi); | ||||||
339 | |||||||
340 | over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi); | ||||||
341 | } | ||||||
342 | |||||||
343 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
344 | in_over_2x128 (__m128i* src_lo, | ||||||
345 | __m128i* src_hi, | ||||||
346 | __m128i* alpha_lo, | ||||||
347 | __m128i* alpha_hi, | ||||||
348 | __m128i* mask_lo, | ||||||
349 | __m128i* mask_hi, | ||||||
350 | __m128i* dst_lo, | ||||||
351 | __m128i* dst_hi) | ||||||
352 | { | ||||||
353 | __m128i s_lo, s_hi; | ||||||
354 | __m128i a_lo, a_hi; | ||||||
355 | |||||||
356 | pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi); | ||||||
357 | pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi); | ||||||
358 | |||||||
359 | over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi); | ||||||
360 | } | ||||||
361 | |||||||
362 | /* load 4 pixels from a 16-byte boundary aligned address */ | ||||||
363 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
364 | load_128_aligned (__m128i* src) | ||||||
365 | { | ||||||
366 | return _mm_load_si128 (src); | ||||||
367 | } | ||||||
368 | |||||||
369 | /* load 4 pixels from a unaligned address */ | ||||||
370 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
371 | load_128_unaligned (const __m128i* src) | ||||||
372 | { | ||||||
373 | return _mm_loadu_si128 (src); | ||||||
374 | } | ||||||
375 | |||||||
376 | /* save 4 pixels using Write Combining memory on a 16-byte | ||||||
377 | * boundary aligned address | ||||||
378 | */ | ||||||
379 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
380 | save_128_write_combining (__m128i* dst, | ||||||
381 | __m128i data) | ||||||
382 | { | ||||||
383 | _mm_stream_si128 (dst, data); | ||||||
384 | } | ||||||
385 | |||||||
386 | /* save 4 pixels on a 16-byte boundary aligned address */ | ||||||
387 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
388 | save_128_aligned (__m128i* dst, | ||||||
389 | __m128i data) | ||||||
390 | { | ||||||
391 | _mm_store_si128 (dst, data); | ||||||
392 | } | ||||||
393 | |||||||
394 | /* save 4 pixels on a unaligned address */ | ||||||
395 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
396 | save_128_unaligned (__m128i* dst, | ||||||
397 | __m128i data) | ||||||
398 | { | ||||||
399 | _mm_storeu_si128 (dst, data); | ||||||
400 | } | ||||||
401 | |||||||
402 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
403 | load_32_1x128 (uint32_t data) | ||||||
404 | { | ||||||
405 | return _mm_cvtsi32_si128 (data); | ||||||
406 | } | ||||||
407 | |||||||
408 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
409 | expand_alpha_rev_1x128 (__m128i data) | ||||||
410 | { | ||||||
411 | return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (0, 0, 0, 0))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(data), (__v8hi)_mm_setzero_si128(), ((((0) << 6) | (( 0) << 4) | ((0) << 2) | (0))) & 0x3, (((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0xc) >> 2, (((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x30) >> 4, (((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0xc0) >> 6, 4, 5, 6 , 7); }); | ||||||
412 | } | ||||||
413 | |||||||
414 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
415 | expand_pixel_8_1x128 (uint8_t data) | ||||||
416 | { | ||||||
417 | return _mm_shufflelo_epi16 (__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(unpack_32_1x128 ((uint32_t)data)), (__v8hi)_mm_setzero_si128 (), ((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x3, (((((0) << 6) | ((0) << 4) | (( 0) << 2) | (0))) & 0xc) >> 2, (((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x30) >> 4, (((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0xc0) >> 6, 4, 5, 6, 7); }) | ||||||
418 | unpack_32_1x128 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(unpack_32_1x128 ((uint32_t)data)), (__v8hi)_mm_setzero_si128 (), ((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x3, (((((0) << 6) | ((0) << 4) | (( 0) << 2) | (0))) & 0xc) >> 2, (((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0x30) >> 4, (((((0) << 6) | ((0) << 4) | ((0) << 2) | (0))) & 0xc0) >> 6, 4, 5, 6, 7); }); | ||||||
419 | } | ||||||
420 | |||||||
421 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
422 | pix_multiply_1x128 (__m128i data, | ||||||
423 | __m128i alpha) | ||||||
424 | { | ||||||
425 | return _mm_mulhi_epu16 (_mm_adds_epu16 (_mm_mullo_epi16 (data, alpha), | ||||||
426 | mask_0080), | ||||||
427 | mask_0101); | ||||||
428 | } | ||||||
429 | |||||||
430 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
431 | pix_add_multiply_1x128 (__m128i* src, | ||||||
432 | __m128i* alpha_dst, | ||||||
433 | __m128i* dst, | ||||||
434 | __m128i* alpha_src) | ||||||
435 | { | ||||||
436 | __m128i t1 = pix_multiply_1x128 (*src, *alpha_dst); | ||||||
437 | __m128i t2 = pix_multiply_1x128 (*dst, *alpha_src); | ||||||
438 | |||||||
439 | return _mm_adds_epu8 (t1, t2); | ||||||
440 | } | ||||||
441 | |||||||
442 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
443 | negate_1x128 (__m128i data) | ||||||
444 | { | ||||||
445 | return _mm_xor_si128 (data, mask_00ff); | ||||||
446 | } | ||||||
447 | |||||||
448 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
449 | invert_colors_1x128 (__m128i data) | ||||||
450 | { | ||||||
451 | return _mm_shufflelo_epi16 (data, _MM_SHUFFLE (3, 0, 1, 2))__extension__ ({ (__m128i)__builtin_shufflevector((__v8hi)(__m128i )(data), (__v8hi)_mm_setzero_si128(), ((((3) << 6) | (( 0) << 4) | ((1) << 2) | (2))) & 0x3, (((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0xc) >> 2, (((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0x30) >> 4, (((((3) << 6) | ((0) << 4) | ((1) << 2) | (2))) & 0xc0) >> 6, 4, 5, 6 , 7); }); | ||||||
452 | } | ||||||
453 | |||||||
454 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
455 | over_1x128 (__m128i src, __m128i alpha, __m128i dst) | ||||||
456 | { | ||||||
457 | return _mm_adds_epu8 (src, pix_multiply_1x128 (dst, negate_1x128 (alpha))); | ||||||
458 | } | ||||||
459 | |||||||
460 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
461 | in_over_1x128 (__m128i* src, __m128i* alpha, __m128i* mask, __m128i* dst) | ||||||
462 | { | ||||||
463 | return over_1x128 (pix_multiply_1x128 (*src, *mask), | ||||||
464 | pix_multiply_1x128 (*alpha, *mask), | ||||||
465 | *dst); | ||||||
466 | } | ||||||
467 | |||||||
468 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
469 | over_rev_non_pre_1x128 (__m128i src, __m128i dst) | ||||||
470 | { | ||||||
471 | __m128i alpha = expand_alpha_1x128 (src); | ||||||
472 | |||||||
473 | return over_1x128 (pix_multiply_1x128 (invert_colors_1x128 (src), | ||||||
474 | _mm_or_si128 (alpha, mask_alpha)), | ||||||
475 | alpha, | ||||||
476 | dst); | ||||||
477 | } | ||||||
478 | |||||||
479 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
480 | pack_1x128_32 (__m128i data) | ||||||
481 | { | ||||||
482 | return _mm_cvtsi128_si32 (_mm_packus_epi16 (data, _mm_setzero_si128 ())); | ||||||
483 | } | ||||||
484 | |||||||
485 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
486 | expand565_16_1x128 (uint16_t pixel) | ||||||
487 | { | ||||||
488 | __m128i m = _mm_cvtsi32_si128 (pixel); | ||||||
489 | |||||||
490 | m = unpack_565_to_8888 (m); | ||||||
491 | |||||||
492 | return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ()); | ||||||
493 | } | ||||||
494 | |||||||
495 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
496 | core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst) | ||||||
497 | { | ||||||
498 | uint8_t a; | ||||||
499 | __m128i xmms; | ||||||
500 | |||||||
501 | a = src >> 24; | ||||||
502 | |||||||
503 | if (a == 0xff) | ||||||
504 | { | ||||||
505 | return src; | ||||||
506 | } | ||||||
507 | else if (src) | ||||||
508 | { | ||||||
509 | xmms = unpack_32_1x128 (src); | ||||||
510 | return pack_1x128_32 ( | ||||||
511 | over_1x128 (xmms, expand_alpha_1x128 (xmms), | ||||||
512 | unpack_32_1x128 (dst))); | ||||||
513 | } | ||||||
514 | |||||||
515 | return dst; | ||||||
516 | } | ||||||
517 | |||||||
518 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
519 | combine1 (const uint32_t *ps, const uint32_t *pm) | ||||||
520 | { | ||||||
521 | uint32_t s = *ps; | ||||||
522 | |||||||
523 | if (pm) | ||||||
524 | { | ||||||
525 | __m128i ms, mm; | ||||||
526 | |||||||
527 | mm = unpack_32_1x128 (*pm); | ||||||
528 | mm = expand_alpha_1x128 (mm); | ||||||
529 | |||||||
530 | ms = unpack_32_1x128 (s); | ||||||
531 | ms = pix_multiply_1x128 (ms, mm); | ||||||
532 | |||||||
533 | s = pack_1x128_32 (ms); | ||||||
534 | } | ||||||
535 | |||||||
536 | return s; | ||||||
537 | } | ||||||
538 | |||||||
539 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
540 | combine4 (const __m128i *ps, const __m128i *pm) | ||||||
541 | { | ||||||
542 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
543 | __m128i xmm_msk_lo, xmm_msk_hi; | ||||||
544 | __m128i s; | ||||||
545 | |||||||
546 | if (pm) | ||||||
547 | { | ||||||
548 | xmm_msk_lo = load_128_unaligned (pm); | ||||||
549 | |||||||
550 | if (is_transparent (xmm_msk_lo)) | ||||||
551 | return _mm_setzero_si128 (); | ||||||
552 | } | ||||||
553 | |||||||
554 | s = load_128_unaligned (ps); | ||||||
555 | |||||||
556 | if (pm) | ||||||
557 | { | ||||||
558 | unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi); | ||||||
559 | unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi); | ||||||
560 | |||||||
561 | expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi); | ||||||
562 | |||||||
563 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
564 | &xmm_msk_lo, &xmm_msk_hi, | ||||||
565 | &xmm_src_lo, &xmm_src_hi); | ||||||
566 | |||||||
567 | s = pack_2x128_128 (xmm_src_lo, xmm_src_hi); | ||||||
568 | } | ||||||
569 | |||||||
570 | return s; | ||||||
571 | } | ||||||
572 | |||||||
573 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
574 | core_combine_over_u_sse2_mask (uint32_t * pd, | ||||||
575 | const uint32_t* ps, | ||||||
576 | const uint32_t* pm, | ||||||
577 | int w) | ||||||
578 | { | ||||||
579 | uint32_t s, d; | ||||||
580 | |||||||
581 | /* Align dst on a 16-byte boundary */ | ||||||
582 | while (w && ((uintptr_t)pd & 15)) | ||||||
583 | { | ||||||
584 | d = *pd; | ||||||
585 | s = combine1 (ps, pm); | ||||||
586 | |||||||
587 | if (s) | ||||||
588 | *pd = core_combine_over_u_pixel_sse2 (s, d); | ||||||
589 | pd++; | ||||||
590 | ps++; | ||||||
591 | pm++; | ||||||
592 | w--; | ||||||
593 | } | ||||||
594 | |||||||
595 | while (w >= 4) | ||||||
596 | { | ||||||
597 | __m128i mask = load_128_unaligned ((__m128i *)pm); | ||||||
598 | |||||||
599 | if (!is_zero (mask)) | ||||||
600 | { | ||||||
601 | __m128i src; | ||||||
602 | __m128i src_hi, src_lo; | ||||||
603 | __m128i mask_hi, mask_lo; | ||||||
604 | __m128i alpha_hi, alpha_lo; | ||||||
605 | |||||||
606 | src = load_128_unaligned ((__m128i *)ps); | ||||||
607 | |||||||
608 | if (is_opaque (_mm_and_si128 (src, mask))) | ||||||
609 | { | ||||||
610 | save_128_aligned ((__m128i *)pd, src); | ||||||
611 | } | ||||||
612 | else | ||||||
613 | { | ||||||
614 | __m128i dst = load_128_aligned ((__m128i *)pd); | ||||||
615 | __m128i dst_hi, dst_lo; | ||||||
616 | |||||||
617 | unpack_128_2x128 (mask, &mask_lo, &mask_hi); | ||||||
618 | unpack_128_2x128 (src, &src_lo, &src_hi); | ||||||
619 | |||||||
620 | expand_alpha_2x128 (mask_lo, mask_hi, &mask_lo, &mask_hi); | ||||||
621 | pix_multiply_2x128 (&src_lo, &src_hi, | ||||||
622 | &mask_lo, &mask_hi, | ||||||
623 | &src_lo, &src_hi); | ||||||
624 | |||||||
625 | unpack_128_2x128 (dst, &dst_lo, &dst_hi); | ||||||
626 | |||||||
627 | expand_alpha_2x128 (src_lo, src_hi, | ||||||
628 | &alpha_lo, &alpha_hi); | ||||||
629 | |||||||
630 | over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, | ||||||
631 | &dst_lo, &dst_hi); | ||||||
632 | |||||||
633 | save_128_aligned ( | ||||||
634 | (__m128i *)pd, | ||||||
635 | pack_2x128_128 (dst_lo, dst_hi)); | ||||||
636 | } | ||||||
637 | } | ||||||
638 | |||||||
639 | pm += 4; | ||||||
640 | ps += 4; | ||||||
641 | pd += 4; | ||||||
642 | w -= 4; | ||||||
643 | } | ||||||
644 | while (w) | ||||||
645 | { | ||||||
646 | d = *pd; | ||||||
647 | s = combine1 (ps, pm); | ||||||
648 | |||||||
649 | if (s) | ||||||
650 | *pd = core_combine_over_u_pixel_sse2 (s, d); | ||||||
651 | pd++; | ||||||
652 | ps++; | ||||||
653 | pm++; | ||||||
654 | |||||||
655 | w--; | ||||||
656 | } | ||||||
657 | } | ||||||
658 | |||||||
659 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
660 | core_combine_over_u_sse2_no_mask (uint32_t * pd, | ||||||
661 | const uint32_t* ps, | ||||||
662 | int w) | ||||||
663 | { | ||||||
664 | uint32_t s, d; | ||||||
665 | |||||||
666 | /* Align dst on a 16-byte boundary */ | ||||||
667 | while (w && ((uintptr_t)pd & 15)) | ||||||
668 | { | ||||||
669 | d = *pd; | ||||||
670 | s = *ps; | ||||||
671 | |||||||
672 | if (s) | ||||||
673 | *pd = core_combine_over_u_pixel_sse2 (s, d); | ||||||
674 | pd++; | ||||||
675 | ps++; | ||||||
676 | w--; | ||||||
677 | } | ||||||
678 | |||||||
679 | while (w >= 4) | ||||||
680 | { | ||||||
681 | __m128i src; | ||||||
682 | __m128i src_hi, src_lo, dst_hi, dst_lo; | ||||||
683 | __m128i alpha_hi, alpha_lo; | ||||||
684 | |||||||
685 | src = load_128_unaligned ((__m128i *)ps); | ||||||
686 | |||||||
687 | if (!is_zero (src)) | ||||||
688 | { | ||||||
689 | if (is_opaque (src)) | ||||||
690 | { | ||||||
691 | save_128_aligned ((__m128i *)pd, src); | ||||||
692 | } | ||||||
693 | else | ||||||
694 | { | ||||||
695 | __m128i dst = load_128_aligned ((__m128i *)pd); | ||||||
696 | |||||||
697 | unpack_128_2x128 (src, &src_lo, &src_hi); | ||||||
698 | unpack_128_2x128 (dst, &dst_lo, &dst_hi); | ||||||
699 | |||||||
700 | expand_alpha_2x128 (src_lo, src_hi, | ||||||
701 | &alpha_lo, &alpha_hi); | ||||||
702 | over_2x128 (&src_lo, &src_hi, &alpha_lo, &alpha_hi, | ||||||
703 | &dst_lo, &dst_hi); | ||||||
704 | |||||||
705 | save_128_aligned ( | ||||||
706 | (__m128i *)pd, | ||||||
707 | pack_2x128_128 (dst_lo, dst_hi)); | ||||||
708 | } | ||||||
709 | } | ||||||
710 | |||||||
711 | ps += 4; | ||||||
712 | pd += 4; | ||||||
713 | w -= 4; | ||||||
714 | } | ||||||
715 | while (w) | ||||||
716 | { | ||||||
717 | d = *pd; | ||||||
718 | s = *ps; | ||||||
719 | |||||||
720 | if (s) | ||||||
721 | *pd = core_combine_over_u_pixel_sse2 (s, d); | ||||||
722 | pd++; | ||||||
723 | ps++; | ||||||
724 | |||||||
725 | w--; | ||||||
726 | } | ||||||
727 | } | ||||||
728 | |||||||
729 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
730 | sse2_combine_over_u (pixman_implementation_t *imp, | ||||||
731 | pixman_op_t op, | ||||||
732 | uint32_t * pd, | ||||||
733 | const uint32_t * ps, | ||||||
734 | const uint32_t * pm, | ||||||
735 | int w) | ||||||
736 | { | ||||||
737 | if (pm) | ||||||
738 | core_combine_over_u_sse2_mask (pd, ps, pm, w); | ||||||
739 | else | ||||||
740 | core_combine_over_u_sse2_no_mask (pd, ps, w); | ||||||
741 | } | ||||||
742 | |||||||
743 | static void | ||||||
744 | sse2_combine_over_reverse_u (pixman_implementation_t *imp, | ||||||
745 | pixman_op_t op, | ||||||
746 | uint32_t * pd, | ||||||
747 | const uint32_t * ps, | ||||||
748 | const uint32_t * pm, | ||||||
749 | int w) | ||||||
750 | { | ||||||
751 | uint32_t s, d; | ||||||
752 | |||||||
753 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
754 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
755 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
756 | |||||||
757 | /* Align dst on a 16-byte boundary */ | ||||||
758 | while (w && | ||||||
759 | ((uintptr_t)pd & 15)) | ||||||
760 | { | ||||||
761 | d = *pd; | ||||||
762 | s = combine1 (ps, pm); | ||||||
763 | |||||||
764 | *pd++ = core_combine_over_u_pixel_sse2 (d, s); | ||||||
765 | w--; | ||||||
766 | ps++; | ||||||
767 | if (pm) | ||||||
768 | pm++; | ||||||
769 | } | ||||||
770 | |||||||
771 | while (w >= 4) | ||||||
772 | { | ||||||
773 | /* I'm loading unaligned because I'm not sure | ||||||
774 | * about the address alignment. | ||||||
775 | */ | ||||||
776 | xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); | ||||||
777 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | ||||||
778 | |||||||
779 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
780 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
781 | |||||||
782 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
783 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
784 | |||||||
785 | over_2x128 (&xmm_dst_lo, &xmm_dst_hi, | ||||||
786 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
787 | &xmm_src_lo, &xmm_src_hi); | ||||||
788 | |||||||
789 | /* rebuid the 4 pixel data and save*/ | ||||||
790 | save_128_aligned ((__m128i*)pd, | ||||||
791 | pack_2x128_128 (xmm_src_lo, xmm_src_hi)); | ||||||
792 | |||||||
793 | w -= 4; | ||||||
794 | ps += 4; | ||||||
795 | pd += 4; | ||||||
796 | |||||||
797 | if (pm) | ||||||
798 | pm += 4; | ||||||
799 | } | ||||||
800 | |||||||
801 | while (w) | ||||||
802 | { | ||||||
803 | d = *pd; | ||||||
804 | s = combine1 (ps, pm); | ||||||
805 | |||||||
806 | *pd++ = core_combine_over_u_pixel_sse2 (d, s); | ||||||
807 | ps++; | ||||||
808 | w--; | ||||||
809 | if (pm) | ||||||
810 | pm++; | ||||||
811 | } | ||||||
812 | } | ||||||
813 | |||||||
814 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
815 | core_combine_in_u_pixel_sse2 (uint32_t src, uint32_t dst) | ||||||
816 | { | ||||||
817 | uint32_t maska = src >> 24; | ||||||
818 | |||||||
819 | if (maska == 0) | ||||||
820 | { | ||||||
821 | return 0; | ||||||
822 | } | ||||||
823 | else if (maska != 0xff) | ||||||
824 | { | ||||||
825 | return pack_1x128_32 ( | ||||||
826 | pix_multiply_1x128 (unpack_32_1x128 (dst), | ||||||
827 | expand_alpha_1x128 (unpack_32_1x128 (src)))); | ||||||
828 | } | ||||||
829 | |||||||
830 | return dst; | ||||||
831 | } | ||||||
832 | |||||||
833 | static void | ||||||
834 | sse2_combine_in_u (pixman_implementation_t *imp, | ||||||
835 | pixman_op_t op, | ||||||
836 | uint32_t * pd, | ||||||
837 | const uint32_t * ps, | ||||||
838 | const uint32_t * pm, | ||||||
839 | int w) | ||||||
840 | { | ||||||
841 | uint32_t s, d; | ||||||
842 | |||||||
843 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
844 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
845 | |||||||
846 | while (w && ((uintptr_t)pd & 15)) | ||||||
847 | { | ||||||
848 | s = combine1 (ps, pm); | ||||||
849 | d = *pd; | ||||||
850 | |||||||
851 | *pd++ = core_combine_in_u_pixel_sse2 (d, s); | ||||||
852 | w--; | ||||||
853 | ps++; | ||||||
854 | if (pm) | ||||||
855 | pm++; | ||||||
856 | } | ||||||
857 | |||||||
858 | while (w >= 4) | ||||||
859 | { | ||||||
860 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | ||||||
861 | xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm); | ||||||
862 | |||||||
863 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
864 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
865 | |||||||
866 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
867 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
868 | &xmm_dst_lo, &xmm_dst_hi, | ||||||
869 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
870 | |||||||
871 | save_128_aligned ((__m128i*)pd, | ||||||
872 | pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
873 | |||||||
874 | ps += 4; | ||||||
875 | pd += 4; | ||||||
876 | w -= 4; | ||||||
877 | if (pm) | ||||||
878 | pm += 4; | ||||||
879 | } | ||||||
880 | |||||||
881 | while (w) | ||||||
882 | { | ||||||
883 | s = combine1 (ps, pm); | ||||||
884 | d = *pd; | ||||||
885 | |||||||
886 | *pd++ = core_combine_in_u_pixel_sse2 (d, s); | ||||||
887 | w--; | ||||||
888 | ps++; | ||||||
889 | if (pm) | ||||||
890 | pm++; | ||||||
891 | } | ||||||
892 | } | ||||||
893 | |||||||
894 | static void | ||||||
895 | sse2_combine_in_reverse_u (pixman_implementation_t *imp, | ||||||
896 | pixman_op_t op, | ||||||
897 | uint32_t * pd, | ||||||
898 | const uint32_t * ps, | ||||||
899 | const uint32_t * pm, | ||||||
900 | int w) | ||||||
901 | { | ||||||
902 | uint32_t s, d; | ||||||
903 | |||||||
904 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
905 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
906 | |||||||
907 | while (w && ((uintptr_t)pd & 15)) | ||||||
908 | { | ||||||
909 | s = combine1 (ps, pm); | ||||||
910 | d = *pd; | ||||||
911 | |||||||
912 | *pd++ = core_combine_in_u_pixel_sse2 (s, d); | ||||||
913 | ps++; | ||||||
914 | w--; | ||||||
915 | if (pm) | ||||||
916 | pm++; | ||||||
917 | } | ||||||
918 | |||||||
919 | while (w >= 4) | ||||||
920 | { | ||||||
921 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | ||||||
922 | xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); | ||||||
923 | |||||||
924 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
925 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
926 | |||||||
927 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
928 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | ||||||
929 | &xmm_src_lo, &xmm_src_hi, | ||||||
930 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
931 | |||||||
932 | save_128_aligned ( | ||||||
933 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
934 | |||||||
935 | ps += 4; | ||||||
936 | pd += 4; | ||||||
937 | w -= 4; | ||||||
938 | if (pm) | ||||||
939 | pm += 4; | ||||||
940 | } | ||||||
941 | |||||||
942 | while (w) | ||||||
943 | { | ||||||
944 | s = combine1 (ps, pm); | ||||||
945 | d = *pd; | ||||||
946 | |||||||
947 | *pd++ = core_combine_in_u_pixel_sse2 (s, d); | ||||||
948 | w--; | ||||||
949 | ps++; | ||||||
950 | if (pm) | ||||||
951 | pm++; | ||||||
952 | } | ||||||
953 | } | ||||||
954 | |||||||
955 | static void | ||||||
956 | sse2_combine_out_reverse_u (pixman_implementation_t *imp, | ||||||
957 | pixman_op_t op, | ||||||
958 | uint32_t * pd, | ||||||
959 | const uint32_t * ps, | ||||||
960 | const uint32_t * pm, | ||||||
961 | int w) | ||||||
962 | { | ||||||
963 | while (w && ((uintptr_t)pd & 15)) | ||||||
964 | { | ||||||
965 | uint32_t s = combine1 (ps, pm); | ||||||
966 | uint32_t d = *pd; | ||||||
967 | |||||||
968 | *pd++ = pack_1x128_32 ( | ||||||
969 | pix_multiply_1x128 ( | ||||||
970 | unpack_32_1x128 (d), negate_1x128 ( | ||||||
971 | expand_alpha_1x128 (unpack_32_1x128 (s))))); | ||||||
972 | |||||||
973 | if (pm) | ||||||
974 | pm++; | ||||||
975 | ps++; | ||||||
976 | w--; | ||||||
977 | } | ||||||
978 | |||||||
979 | while (w >= 4) | ||||||
980 | { | ||||||
981 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
982 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
983 | |||||||
984 | xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); | ||||||
985 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | ||||||
986 | |||||||
987 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
988 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
989 | |||||||
990 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
991 | negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
992 | |||||||
993 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | ||||||
994 | &xmm_src_lo, &xmm_src_hi, | ||||||
995 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
996 | |||||||
997 | save_128_aligned ( | ||||||
998 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
999 | |||||||
1000 | ps += 4; | ||||||
1001 | pd += 4; | ||||||
1002 | if (pm) | ||||||
1003 | pm += 4; | ||||||
1004 | |||||||
1005 | w -= 4; | ||||||
1006 | } | ||||||
1007 | |||||||
1008 | while (w) | ||||||
1009 | { | ||||||
1010 | uint32_t s = combine1 (ps, pm); | ||||||
1011 | uint32_t d = *pd; | ||||||
1012 | |||||||
1013 | *pd++ = pack_1x128_32 ( | ||||||
1014 | pix_multiply_1x128 ( | ||||||
1015 | unpack_32_1x128 (d), negate_1x128 ( | ||||||
1016 | expand_alpha_1x128 (unpack_32_1x128 (s))))); | ||||||
1017 | ps++; | ||||||
1018 | if (pm) | ||||||
1019 | pm++; | ||||||
1020 | w--; | ||||||
1021 | } | ||||||
1022 | } | ||||||
1023 | |||||||
1024 | static void | ||||||
1025 | sse2_combine_out_u (pixman_implementation_t *imp, | ||||||
1026 | pixman_op_t op, | ||||||
1027 | uint32_t * pd, | ||||||
1028 | const uint32_t * ps, | ||||||
1029 | const uint32_t * pm, | ||||||
1030 | int w) | ||||||
1031 | { | ||||||
1032 | while (w && ((uintptr_t)pd & 15)) | ||||||
1033 | { | ||||||
1034 | uint32_t s = combine1 (ps, pm); | ||||||
1035 | uint32_t d = *pd; | ||||||
1036 | |||||||
1037 | *pd++ = pack_1x128_32 ( | ||||||
1038 | pix_multiply_1x128 ( | ||||||
1039 | unpack_32_1x128 (s), negate_1x128 ( | ||||||
1040 | expand_alpha_1x128 (unpack_32_1x128 (d))))); | ||||||
1041 | w--; | ||||||
1042 | ps++; | ||||||
1043 | if (pm) | ||||||
1044 | pm++; | ||||||
1045 | } | ||||||
1046 | |||||||
1047 | while (w >= 4) | ||||||
1048 | { | ||||||
1049 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1050 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1051 | |||||||
1052 | xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm); | ||||||
1053 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | ||||||
1054 | |||||||
1055 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1056 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1057 | |||||||
1058 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1059 | negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1060 | |||||||
1061 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
1062 | &xmm_dst_lo, &xmm_dst_hi, | ||||||
1063 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1064 | |||||||
1065 | save_128_aligned ( | ||||||
1066 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1067 | |||||||
1068 | ps += 4; | ||||||
1069 | pd += 4; | ||||||
1070 | w -= 4; | ||||||
1071 | if (pm) | ||||||
1072 | pm += 4; | ||||||
1073 | } | ||||||
1074 | |||||||
1075 | while (w) | ||||||
1076 | { | ||||||
1077 | uint32_t s = combine1 (ps, pm); | ||||||
1078 | uint32_t d = *pd; | ||||||
1079 | |||||||
1080 | *pd++ = pack_1x128_32 ( | ||||||
1081 | pix_multiply_1x128 ( | ||||||
1082 | unpack_32_1x128 (s), negate_1x128 ( | ||||||
1083 | expand_alpha_1x128 (unpack_32_1x128 (d))))); | ||||||
1084 | w--; | ||||||
1085 | ps++; | ||||||
1086 | if (pm) | ||||||
1087 | pm++; | ||||||
1088 | } | ||||||
1089 | } | ||||||
1090 | |||||||
1091 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
1092 | core_combine_atop_u_pixel_sse2 (uint32_t src, | ||||||
1093 | uint32_t dst) | ||||||
1094 | { | ||||||
1095 | __m128i s = unpack_32_1x128 (src); | ||||||
1096 | __m128i d = unpack_32_1x128 (dst); | ||||||
1097 | |||||||
1098 | __m128i sa = negate_1x128 (expand_alpha_1x128 (s)); | ||||||
1099 | __m128i da = expand_alpha_1x128 (d); | ||||||
1100 | |||||||
1101 | return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); | ||||||
1102 | } | ||||||
1103 | |||||||
1104 | static void | ||||||
1105 | sse2_combine_atop_u (pixman_implementation_t *imp, | ||||||
1106 | pixman_op_t op, | ||||||
1107 | uint32_t * pd, | ||||||
1108 | const uint32_t * ps, | ||||||
1109 | const uint32_t * pm, | ||||||
1110 | int w) | ||||||
1111 | { | ||||||
1112 | uint32_t s, d; | ||||||
1113 | |||||||
1114 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1115 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1116 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | ||||||
1117 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | ||||||
1118 | |||||||
1119 | while (w && ((uintptr_t)pd & 15)) | ||||||
1120 | { | ||||||
1121 | s = combine1 (ps, pm); | ||||||
1122 | d = *pd; | ||||||
1123 | |||||||
1124 | *pd++ = core_combine_atop_u_pixel_sse2 (s, d); | ||||||
1125 | w--; | ||||||
1126 | ps++; | ||||||
1127 | if (pm) | ||||||
1128 | pm++; | ||||||
1129 | } | ||||||
1130 | |||||||
1131 | while (w >= 4) | ||||||
1132 | { | ||||||
1133 | xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); | ||||||
1134 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | ||||||
1135 | |||||||
1136 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1137 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1138 | |||||||
1139 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
1140 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | ||||||
1141 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
1142 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
1143 | |||||||
1144 | negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, | ||||||
1145 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | ||||||
1146 | |||||||
1147 | pix_add_multiply_2x128 ( | ||||||
1148 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | ||||||
1149 | &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, | ||||||
1150 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1151 | |||||||
1152 | save_128_aligned ( | ||||||
1153 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1154 | |||||||
1155 | ps += 4; | ||||||
1156 | pd += 4; | ||||||
1157 | w -= 4; | ||||||
1158 | if (pm) | ||||||
1159 | pm += 4; | ||||||
1160 | } | ||||||
1161 | |||||||
1162 | while (w) | ||||||
1163 | { | ||||||
1164 | s = combine1 (ps, pm); | ||||||
1165 | d = *pd; | ||||||
1166 | |||||||
1167 | *pd++ = core_combine_atop_u_pixel_sse2 (s, d); | ||||||
1168 | w--; | ||||||
1169 | ps++; | ||||||
1170 | if (pm) | ||||||
1171 | pm++; | ||||||
1172 | } | ||||||
1173 | } | ||||||
1174 | |||||||
1175 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
1176 | core_combine_reverse_atop_u_pixel_sse2 (uint32_t src, | ||||||
1177 | uint32_t dst) | ||||||
1178 | { | ||||||
1179 | __m128i s = unpack_32_1x128 (src); | ||||||
1180 | __m128i d = unpack_32_1x128 (dst); | ||||||
1181 | |||||||
1182 | __m128i sa = expand_alpha_1x128 (s); | ||||||
1183 | __m128i da = negate_1x128 (expand_alpha_1x128 (d)); | ||||||
1184 | |||||||
1185 | return pack_1x128_32 (pix_add_multiply_1x128 (&s, &da, &d, &sa)); | ||||||
1186 | } | ||||||
1187 | |||||||
1188 | static void | ||||||
1189 | sse2_combine_atop_reverse_u (pixman_implementation_t *imp, | ||||||
1190 | pixman_op_t op, | ||||||
1191 | uint32_t * pd, | ||||||
1192 | const uint32_t * ps, | ||||||
1193 | const uint32_t * pm, | ||||||
1194 | int w) | ||||||
1195 | { | ||||||
1196 | uint32_t s, d; | ||||||
1197 | |||||||
1198 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1199 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1200 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | ||||||
1201 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | ||||||
1202 | |||||||
1203 | while (w && ((uintptr_t)pd & 15)) | ||||||
1204 | { | ||||||
1205 | s = combine1 (ps, pm); | ||||||
1206 | d = *pd; | ||||||
1207 | |||||||
1208 | *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); | ||||||
1209 | ps++; | ||||||
1210 | w--; | ||||||
1211 | if (pm) | ||||||
1212 | pm++; | ||||||
1213 | } | ||||||
1214 | |||||||
1215 | while (w >= 4) | ||||||
1216 | { | ||||||
1217 | xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm); | ||||||
1218 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | ||||||
1219 | |||||||
1220 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1221 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1222 | |||||||
1223 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
1224 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | ||||||
1225 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
1226 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
1227 | |||||||
1228 | negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, | ||||||
1229 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
1230 | |||||||
1231 | pix_add_multiply_2x128 ( | ||||||
1232 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | ||||||
1233 | &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, | ||||||
1234 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1235 | |||||||
1236 | save_128_aligned ( | ||||||
1237 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1238 | |||||||
1239 | ps += 4; | ||||||
1240 | pd += 4; | ||||||
1241 | w -= 4; | ||||||
1242 | if (pm) | ||||||
1243 | pm += 4; | ||||||
1244 | } | ||||||
1245 | |||||||
1246 | while (w) | ||||||
1247 | { | ||||||
1248 | s = combine1 (ps, pm); | ||||||
1249 | d = *pd; | ||||||
1250 | |||||||
1251 | *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d); | ||||||
1252 | ps++; | ||||||
1253 | w--; | ||||||
1254 | if (pm) | ||||||
1255 | pm++; | ||||||
1256 | } | ||||||
1257 | } | ||||||
1258 | |||||||
1259 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
1260 | core_combine_xor_u_pixel_sse2 (uint32_t src, | ||||||
1261 | uint32_t dst) | ||||||
1262 | { | ||||||
1263 | __m128i s = unpack_32_1x128 (src); | ||||||
1264 | __m128i d = unpack_32_1x128 (dst); | ||||||
1265 | |||||||
1266 | __m128i neg_d = negate_1x128 (expand_alpha_1x128 (d)); | ||||||
1267 | __m128i neg_s = negate_1x128 (expand_alpha_1x128 (s)); | ||||||
1268 | |||||||
1269 | return pack_1x128_32 (pix_add_multiply_1x128 (&s, &neg_d, &d, &neg_s)); | ||||||
1270 | } | ||||||
1271 | |||||||
1272 | static void | ||||||
1273 | sse2_combine_xor_u (pixman_implementation_t *imp, | ||||||
1274 | pixman_op_t op, | ||||||
1275 | uint32_t * dst, | ||||||
1276 | const uint32_t * src, | ||||||
1277 | const uint32_t * mask, | ||||||
1278 | int width) | ||||||
1279 | { | ||||||
1280 | int w = width; | ||||||
1281 | uint32_t s, d; | ||||||
1282 | uint32_t* pd = dst; | ||||||
1283 | const uint32_t* ps = src; | ||||||
1284 | const uint32_t* pm = mask; | ||||||
1285 | |||||||
1286 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | ||||||
1287 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
1288 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | ||||||
1289 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | ||||||
1290 | |||||||
1291 | while (w && ((uintptr_t)pd & 15)) | ||||||
1292 | { | ||||||
1293 | s = combine1 (ps, pm); | ||||||
1294 | d = *pd; | ||||||
1295 | |||||||
1296 | *pd++ = core_combine_xor_u_pixel_sse2 (s, d); | ||||||
1297 | w--; | ||||||
1298 | ps++; | ||||||
1299 | if (pm) | ||||||
1300 | pm++; | ||||||
1301 | } | ||||||
1302 | |||||||
1303 | while (w >= 4) | ||||||
1304 | { | ||||||
1305 | xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm); | ||||||
1306 | xmm_dst = load_128_aligned ((__m128i*) pd); | ||||||
1307 | |||||||
1308 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
1309 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1310 | |||||||
1311 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
1312 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | ||||||
1313 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
1314 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
1315 | |||||||
1316 | negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, | ||||||
1317 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | ||||||
1318 | negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, | ||||||
1319 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
1320 | |||||||
1321 | pix_add_multiply_2x128 ( | ||||||
1322 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | ||||||
1323 | &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, | ||||||
1324 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1325 | |||||||
1326 | save_128_aligned ( | ||||||
1327 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1328 | |||||||
1329 | ps += 4; | ||||||
1330 | pd += 4; | ||||||
1331 | w -= 4; | ||||||
1332 | if (pm) | ||||||
1333 | pm += 4; | ||||||
1334 | } | ||||||
1335 | |||||||
1336 | while (w) | ||||||
1337 | { | ||||||
1338 | s = combine1 (ps, pm); | ||||||
1339 | d = *pd; | ||||||
1340 | |||||||
1341 | *pd++ = core_combine_xor_u_pixel_sse2 (s, d); | ||||||
1342 | w--; | ||||||
1343 | ps++; | ||||||
1344 | if (pm) | ||||||
1345 | pm++; | ||||||
1346 | } | ||||||
1347 | } | ||||||
1348 | |||||||
1349 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
1350 | sse2_combine_add_u (pixman_implementation_t *imp, | ||||||
1351 | pixman_op_t op, | ||||||
1352 | uint32_t * dst, | ||||||
1353 | const uint32_t * src, | ||||||
1354 | const uint32_t * mask, | ||||||
1355 | int width) | ||||||
1356 | { | ||||||
1357 | int w = width; | ||||||
1358 | uint32_t s, d; | ||||||
1359 | uint32_t* pd = dst; | ||||||
1360 | const uint32_t* ps = src; | ||||||
1361 | const uint32_t* pm = mask; | ||||||
1362 | |||||||
1363 | while (w && (uintptr_t)pd & 15) | ||||||
1364 | { | ||||||
1365 | s = combine1 (ps, pm); | ||||||
1366 | d = *pd; | ||||||
1367 | |||||||
1368 | ps++; | ||||||
1369 | if (pm) | ||||||
1370 | pm++; | ||||||
1371 | *pd++ = _mm_cvtsi128_si32 ( | ||||||
1372 | _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); | ||||||
1373 | w--; | ||||||
1374 | } | ||||||
1375 | |||||||
1376 | while (w >= 4) | ||||||
1377 | { | ||||||
1378 | __m128i s; | ||||||
1379 | |||||||
1380 | s = combine4 ((__m128i*)ps, (__m128i*)pm); | ||||||
1381 | |||||||
1382 | save_128_aligned ( | ||||||
1383 | (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd))); | ||||||
1384 | |||||||
1385 | pd += 4; | ||||||
1386 | ps += 4; | ||||||
1387 | if (pm) | ||||||
1388 | pm += 4; | ||||||
1389 | w -= 4; | ||||||
1390 | } | ||||||
1391 | |||||||
1392 | while (w--) | ||||||
1393 | { | ||||||
1394 | s = combine1 (ps, pm); | ||||||
1395 | d = *pd; | ||||||
1396 | |||||||
1397 | ps++; | ||||||
1398 | *pd++ = _mm_cvtsi128_si32 ( | ||||||
1399 | _mm_adds_epu8 (_mm_cvtsi32_si128 (s), _mm_cvtsi32_si128 (d))); | ||||||
1400 | if (pm) | ||||||
1401 | pm++; | ||||||
1402 | } | ||||||
1403 | } | ||||||
1404 | |||||||
1405 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
1406 | core_combine_saturate_u_pixel_sse2 (uint32_t src, | ||||||
1407 | uint32_t dst) | ||||||
1408 | { | ||||||
1409 | __m128i ms = unpack_32_1x128 (src); | ||||||
1410 | __m128i md = unpack_32_1x128 (dst); | ||||||
1411 | uint32_t sa = src >> 24; | ||||||
1412 | uint32_t da = ~dst >> 24; | ||||||
1413 | |||||||
1414 | if (sa > da) | ||||||
1415 | { | ||||||
1416 | ms = pix_multiply_1x128 ( | ||||||
1417 | ms, expand_alpha_1x128 (unpack_32_1x128 (DIV_UN8 (da, sa)(((uint16_t) (da) * 0xff + ((sa) / 2)) / (sa)) << 24))); | ||||||
1418 | } | ||||||
1419 | |||||||
1420 | return pack_1x128_32 (_mm_adds_epu16 (md, ms)); | ||||||
1421 | } | ||||||
1422 | |||||||
1423 | static void | ||||||
1424 | sse2_combine_saturate_u (pixman_implementation_t *imp, | ||||||
1425 | pixman_op_t op, | ||||||
1426 | uint32_t * pd, | ||||||
1427 | const uint32_t * ps, | ||||||
1428 | const uint32_t * pm, | ||||||
1429 | int w) | ||||||
1430 | { | ||||||
1431 | uint32_t s, d; | ||||||
1432 | |||||||
1433 | uint32_t pack_cmp; | ||||||
1434 | __m128i xmm_src, xmm_dst; | ||||||
1435 | |||||||
1436 | while (w && (uintptr_t)pd & 15) | ||||||
1437 | { | ||||||
1438 | s = combine1 (ps, pm); | ||||||
1439 | d = *pd; | ||||||
1440 | |||||||
1441 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | ||||||
1442 | w--; | ||||||
1443 | ps++; | ||||||
1444 | if (pm) | ||||||
1445 | pm++; | ||||||
1446 | } | ||||||
1447 | |||||||
1448 | while (w >= 4) | ||||||
1449 | { | ||||||
1450 | xmm_dst = load_128_aligned ((__m128i*)pd); | ||||||
1451 | xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm); | ||||||
1452 | |||||||
1453 | pack_cmp = _mm_movemask_epi8 ( | ||||||
1454 | _mm_cmpgt_epi32 ( | ||||||
1455 | _mm_srli_epi32 (xmm_src, 24), | ||||||
1456 | _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24))); | ||||||
1457 | |||||||
1458 | /* if some alpha src is grater than respective ~alpha dst */ | ||||||
1459 | if (pack_cmp) | ||||||
1460 | { | ||||||
1461 | s = combine1 (ps++, pm); | ||||||
1462 | d = *pd; | ||||||
1463 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | ||||||
1464 | if (pm) | ||||||
1465 | pm++; | ||||||
1466 | |||||||
1467 | s = combine1 (ps++, pm); | ||||||
1468 | d = *pd; | ||||||
1469 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | ||||||
1470 | if (pm) | ||||||
1471 | pm++; | ||||||
1472 | |||||||
1473 | s = combine1 (ps++, pm); | ||||||
1474 | d = *pd; | ||||||
1475 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | ||||||
1476 | if (pm) | ||||||
1477 | pm++; | ||||||
1478 | |||||||
1479 | s = combine1 (ps++, pm); | ||||||
1480 | d = *pd; | ||||||
1481 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | ||||||
1482 | if (pm) | ||||||
1483 | pm++; | ||||||
1484 | } | ||||||
1485 | else | ||||||
1486 | { | ||||||
1487 | save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src)); | ||||||
1488 | |||||||
1489 | pd += 4; | ||||||
1490 | ps += 4; | ||||||
1491 | if (pm) | ||||||
1492 | pm += 4; | ||||||
1493 | } | ||||||
1494 | |||||||
1495 | w -= 4; | ||||||
1496 | } | ||||||
1497 | |||||||
1498 | while (w--) | ||||||
1499 | { | ||||||
1500 | s = combine1 (ps, pm); | ||||||
1501 | d = *pd; | ||||||
1502 | |||||||
1503 | *pd++ = core_combine_saturate_u_pixel_sse2 (s, d); | ||||||
1504 | ps++; | ||||||
1505 | if (pm) | ||||||
1506 | pm++; | ||||||
1507 | } | ||||||
1508 | } | ||||||
1509 | |||||||
1510 | static void | ||||||
1511 | sse2_combine_src_ca (pixman_implementation_t *imp, | ||||||
1512 | pixman_op_t op, | ||||||
1513 | uint32_t * pd, | ||||||
1514 | const uint32_t * ps, | ||||||
1515 | const uint32_t * pm, | ||||||
1516 | int w) | ||||||
1517 | { | ||||||
1518 | uint32_t s, m; | ||||||
1519 | |||||||
1520 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1521 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
1522 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1523 | |||||||
1524 | while (w && (uintptr_t)pd & 15) | ||||||
1525 | { | ||||||
1526 | s = *ps++; | ||||||
1527 | m = *pm++; | ||||||
1528 | *pd++ = pack_1x128_32 ( | ||||||
1529 | pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); | ||||||
1530 | w--; | ||||||
1531 | } | ||||||
1532 | |||||||
1533 | while (w >= 4) | ||||||
1534 | { | ||||||
1535 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
1536 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
1537 | |||||||
1538 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1539 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
1540 | |||||||
1541 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
1542 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
1543 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1544 | |||||||
1545 | save_128_aligned ( | ||||||
1546 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1547 | |||||||
1548 | ps += 4; | ||||||
1549 | pd += 4; | ||||||
1550 | pm += 4; | ||||||
1551 | w -= 4; | ||||||
1552 | } | ||||||
1553 | |||||||
1554 | while (w) | ||||||
1555 | { | ||||||
1556 | s = *ps++; | ||||||
1557 | m = *pm++; | ||||||
1558 | *pd++ = pack_1x128_32 ( | ||||||
1559 | pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m))); | ||||||
1560 | w--; | ||||||
1561 | } | ||||||
1562 | } | ||||||
1563 | |||||||
1564 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
1565 | core_combine_over_ca_pixel_sse2 (uint32_t src, | ||||||
1566 | uint32_t mask, | ||||||
1567 | uint32_t dst) | ||||||
1568 | { | ||||||
1569 | __m128i s = unpack_32_1x128 (src); | ||||||
1570 | __m128i expAlpha = expand_alpha_1x128 (s); | ||||||
1571 | __m128i unpk_mask = unpack_32_1x128 (mask); | ||||||
1572 | __m128i unpk_dst = unpack_32_1x128 (dst); | ||||||
1573 | |||||||
1574 | return pack_1x128_32 (in_over_1x128 (&s, &expAlpha, &unpk_mask, &unpk_dst)); | ||||||
1575 | } | ||||||
1576 | |||||||
1577 | static void | ||||||
1578 | sse2_combine_over_ca (pixman_implementation_t *imp, | ||||||
1579 | pixman_op_t op, | ||||||
1580 | uint32_t * pd, | ||||||
1581 | const uint32_t * ps, | ||||||
1582 | const uint32_t * pm, | ||||||
1583 | int w) | ||||||
1584 | { | ||||||
1585 | uint32_t s, m, d; | ||||||
1586 | |||||||
1587 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
1588 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1589 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1590 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
1591 | |||||||
1592 | while (w && (uintptr_t)pd & 15) | ||||||
1593 | { | ||||||
1594 | s = *ps++; | ||||||
1595 | m = *pm++; | ||||||
1596 | d = *pd; | ||||||
1597 | |||||||
1598 | *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); | ||||||
1599 | w--; | ||||||
1600 | } | ||||||
1601 | |||||||
1602 | while (w >= 4) | ||||||
1603 | { | ||||||
1604 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
1605 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
1606 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
1607 | |||||||
1608 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1609 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1610 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
1611 | |||||||
1612 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
1613 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
1614 | |||||||
1615 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
1616 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
1617 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
1618 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1619 | |||||||
1620 | save_128_aligned ( | ||||||
1621 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1622 | |||||||
1623 | ps += 4; | ||||||
1624 | pd += 4; | ||||||
1625 | pm += 4; | ||||||
1626 | w -= 4; | ||||||
1627 | } | ||||||
1628 | |||||||
1629 | while (w) | ||||||
1630 | { | ||||||
1631 | s = *ps++; | ||||||
1632 | m = *pm++; | ||||||
1633 | d = *pd; | ||||||
1634 | |||||||
1635 | *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d); | ||||||
1636 | w--; | ||||||
1637 | } | ||||||
1638 | } | ||||||
1639 | |||||||
1640 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
1641 | core_combine_over_reverse_ca_pixel_sse2 (uint32_t src, | ||||||
1642 | uint32_t mask, | ||||||
1643 | uint32_t dst) | ||||||
1644 | { | ||||||
1645 | __m128i d = unpack_32_1x128 (dst); | ||||||
1646 | |||||||
1647 | return pack_1x128_32 ( | ||||||
1648 | over_1x128 (d, expand_alpha_1x128 (d), | ||||||
1649 | pix_multiply_1x128 (unpack_32_1x128 (src), | ||||||
1650 | unpack_32_1x128 (mask)))); | ||||||
1651 | } | ||||||
1652 | |||||||
1653 | static void | ||||||
1654 | sse2_combine_over_reverse_ca (pixman_implementation_t *imp, | ||||||
1655 | pixman_op_t op, | ||||||
1656 | uint32_t * pd, | ||||||
1657 | const uint32_t * ps, | ||||||
1658 | const uint32_t * pm, | ||||||
1659 | int w) | ||||||
1660 | { | ||||||
1661 | uint32_t s, m, d; | ||||||
1662 | |||||||
1663 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
1664 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1665 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1666 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
1667 | |||||||
1668 | while (w && (uintptr_t)pd & 15) | ||||||
1669 | { | ||||||
1670 | s = *ps++; | ||||||
1671 | m = *pm++; | ||||||
1672 | d = *pd; | ||||||
1673 | |||||||
1674 | *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); | ||||||
1675 | w--; | ||||||
1676 | } | ||||||
1677 | |||||||
1678 | while (w >= 4) | ||||||
1679 | { | ||||||
1680 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
1681 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
1682 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
1683 | |||||||
1684 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1685 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1686 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
1687 | |||||||
1688 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
1689 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
1690 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
1691 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
1692 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
1693 | |||||||
1694 | over_2x128 (&xmm_dst_lo, &xmm_dst_hi, | ||||||
1695 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
1696 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
1697 | |||||||
1698 | save_128_aligned ( | ||||||
1699 | (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); | ||||||
1700 | |||||||
1701 | ps += 4; | ||||||
1702 | pd += 4; | ||||||
1703 | pm += 4; | ||||||
1704 | w -= 4; | ||||||
1705 | } | ||||||
1706 | |||||||
1707 | while (w) | ||||||
1708 | { | ||||||
1709 | s = *ps++; | ||||||
1710 | m = *pm++; | ||||||
1711 | d = *pd; | ||||||
1712 | |||||||
1713 | *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d); | ||||||
1714 | w--; | ||||||
1715 | } | ||||||
1716 | } | ||||||
1717 | |||||||
1718 | static void | ||||||
1719 | sse2_combine_in_ca (pixman_implementation_t *imp, | ||||||
1720 | pixman_op_t op, | ||||||
1721 | uint32_t * pd, | ||||||
1722 | const uint32_t * ps, | ||||||
1723 | const uint32_t * pm, | ||||||
1724 | int w) | ||||||
1725 | { | ||||||
1726 | uint32_t s, m, d; | ||||||
1727 | |||||||
1728 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
1729 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1730 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1731 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
1732 | |||||||
1733 | while (w && (uintptr_t)pd & 15) | ||||||
1734 | { | ||||||
1735 | s = *ps++; | ||||||
1736 | m = *pm++; | ||||||
1737 | d = *pd; | ||||||
1738 | |||||||
1739 | *pd++ = pack_1x128_32 ( | ||||||
1740 | pix_multiply_1x128 ( | ||||||
1741 | pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (m)), | ||||||
1742 | expand_alpha_1x128 (unpack_32_1x128 (d)))); | ||||||
1743 | |||||||
1744 | w--; | ||||||
1745 | } | ||||||
1746 | |||||||
1747 | while (w >= 4) | ||||||
1748 | { | ||||||
1749 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
1750 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
1751 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
1752 | |||||||
1753 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1754 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1755 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
1756 | |||||||
1757 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
1758 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
1759 | |||||||
1760 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
1761 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
1762 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1763 | |||||||
1764 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | ||||||
1765 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
1766 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1767 | |||||||
1768 | save_128_aligned ( | ||||||
1769 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1770 | |||||||
1771 | ps += 4; | ||||||
1772 | pd += 4; | ||||||
1773 | pm += 4; | ||||||
1774 | w -= 4; | ||||||
1775 | } | ||||||
1776 | |||||||
1777 | while (w) | ||||||
1778 | { | ||||||
1779 | s = *ps++; | ||||||
1780 | m = *pm++; | ||||||
1781 | d = *pd; | ||||||
1782 | |||||||
1783 | *pd++ = pack_1x128_32 ( | ||||||
1784 | pix_multiply_1x128 ( | ||||||
1785 | pix_multiply_1x128 ( | ||||||
1786 | unpack_32_1x128 (s), unpack_32_1x128 (m)), | ||||||
1787 | expand_alpha_1x128 (unpack_32_1x128 (d)))); | ||||||
1788 | |||||||
1789 | w--; | ||||||
1790 | } | ||||||
1791 | } | ||||||
1792 | |||||||
1793 | static void | ||||||
1794 | sse2_combine_in_reverse_ca (pixman_implementation_t *imp, | ||||||
1795 | pixman_op_t op, | ||||||
1796 | uint32_t * pd, | ||||||
1797 | const uint32_t * ps, | ||||||
1798 | const uint32_t * pm, | ||||||
1799 | int w) | ||||||
1800 | { | ||||||
1801 | uint32_t s, m, d; | ||||||
1802 | |||||||
1803 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
1804 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1805 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1806 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
1807 | |||||||
1808 | while (w && (uintptr_t)pd & 15) | ||||||
1809 | { | ||||||
1810 | s = *ps++; | ||||||
1811 | m = *pm++; | ||||||
1812 | d = *pd; | ||||||
1813 | |||||||
1814 | *pd++ = pack_1x128_32 ( | ||||||
1815 | pix_multiply_1x128 ( | ||||||
1816 | unpack_32_1x128 (d), | ||||||
1817 | pix_multiply_1x128 (unpack_32_1x128 (m), | ||||||
1818 | expand_alpha_1x128 (unpack_32_1x128 (s))))); | ||||||
1819 | w--; | ||||||
1820 | } | ||||||
1821 | |||||||
1822 | while (w >= 4) | ||||||
1823 | { | ||||||
1824 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
1825 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
1826 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
1827 | |||||||
1828 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1829 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1830 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
1831 | |||||||
1832 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
1833 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
1834 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | ||||||
1835 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
1836 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
1837 | |||||||
1838 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | ||||||
1839 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
1840 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1841 | |||||||
1842 | save_128_aligned ( | ||||||
1843 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1844 | |||||||
1845 | ps += 4; | ||||||
1846 | pd += 4; | ||||||
1847 | pm += 4; | ||||||
1848 | w -= 4; | ||||||
1849 | } | ||||||
1850 | |||||||
1851 | while (w) | ||||||
1852 | { | ||||||
1853 | s = *ps++; | ||||||
1854 | m = *pm++; | ||||||
1855 | d = *pd; | ||||||
1856 | |||||||
1857 | *pd++ = pack_1x128_32 ( | ||||||
1858 | pix_multiply_1x128 ( | ||||||
1859 | unpack_32_1x128 (d), | ||||||
1860 | pix_multiply_1x128 (unpack_32_1x128 (m), | ||||||
1861 | expand_alpha_1x128 (unpack_32_1x128 (s))))); | ||||||
1862 | w--; | ||||||
1863 | } | ||||||
1864 | } | ||||||
1865 | |||||||
1866 | static void | ||||||
1867 | sse2_combine_out_ca (pixman_implementation_t *imp, | ||||||
1868 | pixman_op_t op, | ||||||
1869 | uint32_t * pd, | ||||||
1870 | const uint32_t * ps, | ||||||
1871 | const uint32_t * pm, | ||||||
1872 | int w) | ||||||
1873 | { | ||||||
1874 | uint32_t s, m, d; | ||||||
1875 | |||||||
1876 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
1877 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1878 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1879 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
1880 | |||||||
1881 | while (w && (uintptr_t)pd & 15) | ||||||
1882 | { | ||||||
1883 | s = *ps++; | ||||||
1884 | m = *pm++; | ||||||
1885 | d = *pd; | ||||||
1886 | |||||||
1887 | *pd++ = pack_1x128_32 ( | ||||||
1888 | pix_multiply_1x128 ( | ||||||
1889 | pix_multiply_1x128 ( | ||||||
1890 | unpack_32_1x128 (s), unpack_32_1x128 (m)), | ||||||
1891 | negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); | ||||||
1892 | w--; | ||||||
1893 | } | ||||||
1894 | |||||||
1895 | while (w >= 4) | ||||||
1896 | { | ||||||
1897 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
1898 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
1899 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
1900 | |||||||
1901 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1902 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1903 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
1904 | |||||||
1905 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
1906 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
1907 | negate_2x128 (xmm_alpha_lo, xmm_alpha_hi, | ||||||
1908 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
1909 | |||||||
1910 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
1911 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
1912 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1913 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | ||||||
1914 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
1915 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1916 | |||||||
1917 | save_128_aligned ( | ||||||
1918 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1919 | |||||||
1920 | ps += 4; | ||||||
1921 | pd += 4; | ||||||
1922 | pm += 4; | ||||||
1923 | w -= 4; | ||||||
1924 | } | ||||||
1925 | |||||||
1926 | while (w) | ||||||
1927 | { | ||||||
1928 | s = *ps++; | ||||||
1929 | m = *pm++; | ||||||
1930 | d = *pd; | ||||||
1931 | |||||||
1932 | *pd++ = pack_1x128_32 ( | ||||||
1933 | pix_multiply_1x128 ( | ||||||
1934 | pix_multiply_1x128 ( | ||||||
1935 | unpack_32_1x128 (s), unpack_32_1x128 (m)), | ||||||
1936 | negate_1x128 (expand_alpha_1x128 (unpack_32_1x128 (d))))); | ||||||
1937 | |||||||
1938 | w--; | ||||||
1939 | } | ||||||
1940 | } | ||||||
1941 | |||||||
1942 | static void | ||||||
1943 | sse2_combine_out_reverse_ca (pixman_implementation_t *imp, | ||||||
1944 | pixman_op_t op, | ||||||
1945 | uint32_t * pd, | ||||||
1946 | const uint32_t * ps, | ||||||
1947 | const uint32_t * pm, | ||||||
1948 | int w) | ||||||
1949 | { | ||||||
1950 | uint32_t s, m, d; | ||||||
1951 | |||||||
1952 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
1953 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
1954 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
1955 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
1956 | |||||||
1957 | while (w && (uintptr_t)pd & 15) | ||||||
1958 | { | ||||||
1959 | s = *ps++; | ||||||
1960 | m = *pm++; | ||||||
1961 | d = *pd; | ||||||
1962 | |||||||
1963 | *pd++ = pack_1x128_32 ( | ||||||
1964 | pix_multiply_1x128 ( | ||||||
1965 | unpack_32_1x128 (d), | ||||||
1966 | negate_1x128 (pix_multiply_1x128 ( | ||||||
1967 | unpack_32_1x128 (m), | ||||||
1968 | expand_alpha_1x128 (unpack_32_1x128 (s)))))); | ||||||
1969 | w--; | ||||||
1970 | } | ||||||
1971 | |||||||
1972 | while (w >= 4) | ||||||
1973 | { | ||||||
1974 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
1975 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
1976 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
1977 | |||||||
1978 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
1979 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
1980 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
1981 | |||||||
1982 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
1983 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
1984 | |||||||
1985 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | ||||||
1986 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
1987 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
1988 | |||||||
1989 | negate_2x128 (xmm_mask_lo, xmm_mask_hi, | ||||||
1990 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
1991 | |||||||
1992 | pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, | ||||||
1993 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
1994 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
1995 | |||||||
1996 | save_128_aligned ( | ||||||
1997 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
1998 | |||||||
1999 | ps += 4; | ||||||
2000 | pd += 4; | ||||||
2001 | pm += 4; | ||||||
2002 | w -= 4; | ||||||
2003 | } | ||||||
2004 | |||||||
2005 | while (w) | ||||||
2006 | { | ||||||
2007 | s = *ps++; | ||||||
2008 | m = *pm++; | ||||||
2009 | d = *pd; | ||||||
2010 | |||||||
2011 | *pd++ = pack_1x128_32 ( | ||||||
2012 | pix_multiply_1x128 ( | ||||||
2013 | unpack_32_1x128 (d), | ||||||
2014 | negate_1x128 (pix_multiply_1x128 ( | ||||||
2015 | unpack_32_1x128 (m), | ||||||
2016 | expand_alpha_1x128 (unpack_32_1x128 (s)))))); | ||||||
2017 | w--; | ||||||
2018 | } | ||||||
2019 | } | ||||||
2020 | |||||||
2021 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
2022 | core_combine_atop_ca_pixel_sse2 (uint32_t src, | ||||||
2023 | uint32_t mask, | ||||||
2024 | uint32_t dst) | ||||||
2025 | { | ||||||
2026 | __m128i m = unpack_32_1x128 (mask); | ||||||
2027 | __m128i s = unpack_32_1x128 (src); | ||||||
2028 | __m128i d = unpack_32_1x128 (dst); | ||||||
2029 | __m128i sa = expand_alpha_1x128 (s); | ||||||
2030 | __m128i da = expand_alpha_1x128 (d); | ||||||
2031 | |||||||
2032 | s = pix_multiply_1x128 (s, m); | ||||||
2033 | m = negate_1x128 (pix_multiply_1x128 (m, sa)); | ||||||
2034 | |||||||
2035 | return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); | ||||||
2036 | } | ||||||
2037 | |||||||
2038 | static void | ||||||
2039 | sse2_combine_atop_ca (pixman_implementation_t *imp, | ||||||
2040 | pixman_op_t op, | ||||||
2041 | uint32_t * pd, | ||||||
2042 | const uint32_t * ps, | ||||||
2043 | const uint32_t * pm, | ||||||
2044 | int w) | ||||||
2045 | { | ||||||
2046 | uint32_t s, m, d; | ||||||
2047 | |||||||
2048 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
2049 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
2050 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | ||||||
2051 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | ||||||
2052 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
2053 | |||||||
2054 | while (w && (uintptr_t)pd & 15) | ||||||
2055 | { | ||||||
2056 | s = *ps++; | ||||||
2057 | m = *pm++; | ||||||
2058 | d = *pd; | ||||||
2059 | |||||||
2060 | *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); | ||||||
2061 | w--; | ||||||
2062 | } | ||||||
2063 | |||||||
2064 | while (w >= 4) | ||||||
2065 | { | ||||||
2066 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
2067 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
2068 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
2069 | |||||||
2070 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
2071 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
2072 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
2073 | |||||||
2074 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
2075 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | ||||||
2076 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
2077 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
2078 | |||||||
2079 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
2080 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
2081 | &xmm_src_lo, &xmm_src_hi); | ||||||
2082 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | ||||||
2083 | &xmm_alpha_src_lo, &xmm_alpha_src_hi, | ||||||
2084 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
2085 | |||||||
2086 | negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
2087 | |||||||
2088 | pix_add_multiply_2x128 ( | ||||||
2089 | &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, | ||||||
2090 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | ||||||
2091 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
2092 | |||||||
2093 | save_128_aligned ( | ||||||
2094 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
2095 | |||||||
2096 | ps += 4; | ||||||
2097 | pd += 4; | ||||||
2098 | pm += 4; | ||||||
2099 | w -= 4; | ||||||
2100 | } | ||||||
2101 | |||||||
2102 | while (w) | ||||||
2103 | { | ||||||
2104 | s = *ps++; | ||||||
2105 | m = *pm++; | ||||||
2106 | d = *pd; | ||||||
2107 | |||||||
2108 | *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d); | ||||||
2109 | w--; | ||||||
2110 | } | ||||||
2111 | } | ||||||
2112 | |||||||
2113 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
2114 | core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src, | ||||||
2115 | uint32_t mask, | ||||||
2116 | uint32_t dst) | ||||||
2117 | { | ||||||
2118 | __m128i m = unpack_32_1x128 (mask); | ||||||
2119 | __m128i s = unpack_32_1x128 (src); | ||||||
2120 | __m128i d = unpack_32_1x128 (dst); | ||||||
2121 | |||||||
2122 | __m128i da = negate_1x128 (expand_alpha_1x128 (d)); | ||||||
2123 | __m128i sa = expand_alpha_1x128 (s); | ||||||
2124 | |||||||
2125 | s = pix_multiply_1x128 (s, m); | ||||||
2126 | m = pix_multiply_1x128 (m, sa); | ||||||
2127 | |||||||
2128 | return pack_1x128_32 (pix_add_multiply_1x128 (&d, &m, &s, &da)); | ||||||
2129 | } | ||||||
2130 | |||||||
2131 | static void | ||||||
2132 | sse2_combine_atop_reverse_ca (pixman_implementation_t *imp, | ||||||
2133 | pixman_op_t op, | ||||||
2134 | uint32_t * pd, | ||||||
2135 | const uint32_t * ps, | ||||||
2136 | const uint32_t * pm, | ||||||
2137 | int w) | ||||||
2138 | { | ||||||
2139 | uint32_t s, m, d; | ||||||
2140 | |||||||
2141 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
2142 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
2143 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | ||||||
2144 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | ||||||
2145 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
2146 | |||||||
2147 | while (w && (uintptr_t)pd & 15) | ||||||
2148 | { | ||||||
2149 | s = *ps++; | ||||||
2150 | m = *pm++; | ||||||
2151 | d = *pd; | ||||||
2152 | |||||||
2153 | *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); | ||||||
2154 | w--; | ||||||
2155 | } | ||||||
2156 | |||||||
2157 | while (w >= 4) | ||||||
2158 | { | ||||||
2159 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
2160 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
2161 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
2162 | |||||||
2163 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
2164 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
2165 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
2166 | |||||||
2167 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
2168 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | ||||||
2169 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
2170 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
2171 | |||||||
2172 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
2173 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
2174 | &xmm_src_lo, &xmm_src_hi); | ||||||
2175 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | ||||||
2176 | &xmm_alpha_src_lo, &xmm_alpha_src_hi, | ||||||
2177 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
2178 | |||||||
2179 | negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, | ||||||
2180 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
2181 | |||||||
2182 | pix_add_multiply_2x128 ( | ||||||
2183 | &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, | ||||||
2184 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | ||||||
2185 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
2186 | |||||||
2187 | save_128_aligned ( | ||||||
2188 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
2189 | |||||||
2190 | ps += 4; | ||||||
2191 | pd += 4; | ||||||
2192 | pm += 4; | ||||||
2193 | w -= 4; | ||||||
2194 | } | ||||||
2195 | |||||||
2196 | while (w) | ||||||
2197 | { | ||||||
2198 | s = *ps++; | ||||||
2199 | m = *pm++; | ||||||
2200 | d = *pd; | ||||||
2201 | |||||||
2202 | *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d); | ||||||
2203 | w--; | ||||||
2204 | } | ||||||
2205 | } | ||||||
2206 | |||||||
2207 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t | ||||||
2208 | core_combine_xor_ca_pixel_sse2 (uint32_t src, | ||||||
2209 | uint32_t mask, | ||||||
2210 | uint32_t dst) | ||||||
2211 | { | ||||||
2212 | __m128i a = unpack_32_1x128 (mask); | ||||||
2213 | __m128i s = unpack_32_1x128 (src); | ||||||
2214 | __m128i d = unpack_32_1x128 (dst); | ||||||
2215 | |||||||
2216 | __m128i alpha_dst = negate_1x128 (pix_multiply_1x128 ( | ||||||
2217 | a, expand_alpha_1x128 (s))); | ||||||
2218 | __m128i dest = pix_multiply_1x128 (s, a); | ||||||
2219 | __m128i alpha_src = negate_1x128 (expand_alpha_1x128 (d)); | ||||||
2220 | |||||||
2221 | return pack_1x128_32 (pix_add_multiply_1x128 (&d, | ||||||
2222 | &alpha_dst, | ||||||
2223 | &dest, | ||||||
2224 | &alpha_src)); | ||||||
2225 | } | ||||||
2226 | |||||||
2227 | static void | ||||||
2228 | sse2_combine_xor_ca (pixman_implementation_t *imp, | ||||||
2229 | pixman_op_t op, | ||||||
2230 | uint32_t * pd, | ||||||
2231 | const uint32_t * ps, | ||||||
2232 | const uint32_t * pm, | ||||||
2233 | int w) | ||||||
2234 | { | ||||||
2235 | uint32_t s, m, d; | ||||||
2236 | |||||||
2237 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
2238 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
2239 | __m128i xmm_alpha_src_lo, xmm_alpha_src_hi; | ||||||
2240 | __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi; | ||||||
2241 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
2242 | |||||||
2243 | while (w && (uintptr_t)pd & 15) | ||||||
2244 | { | ||||||
2245 | s = *ps++; | ||||||
2246 | m = *pm++; | ||||||
2247 | d = *pd; | ||||||
2248 | |||||||
2249 | *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); | ||||||
2250 | w--; | ||||||
2251 | } | ||||||
2252 | |||||||
2253 | while (w >= 4) | ||||||
2254 | { | ||||||
2255 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
2256 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
2257 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
2258 | |||||||
2259 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
2260 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
2261 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
2262 | |||||||
2263 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
2264 | &xmm_alpha_src_lo, &xmm_alpha_src_hi); | ||||||
2265 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, | ||||||
2266 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
2267 | |||||||
2268 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
2269 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
2270 | &xmm_src_lo, &xmm_src_hi); | ||||||
2271 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | ||||||
2272 | &xmm_alpha_src_lo, &xmm_alpha_src_hi, | ||||||
2273 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
2274 | |||||||
2275 | negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, | ||||||
2276 | &xmm_alpha_dst_lo, &xmm_alpha_dst_hi); | ||||||
2277 | negate_2x128 (xmm_mask_lo, xmm_mask_hi, | ||||||
2278 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
2279 | |||||||
2280 | pix_add_multiply_2x128 ( | ||||||
2281 | &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, | ||||||
2282 | &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi, | ||||||
2283 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
2284 | |||||||
2285 | save_128_aligned ( | ||||||
2286 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
2287 | |||||||
2288 | ps += 4; | ||||||
2289 | pd += 4; | ||||||
2290 | pm += 4; | ||||||
2291 | w -= 4; | ||||||
2292 | } | ||||||
2293 | |||||||
2294 | while (w) | ||||||
2295 | { | ||||||
2296 | s = *ps++; | ||||||
2297 | m = *pm++; | ||||||
2298 | d = *pd; | ||||||
2299 | |||||||
2300 | *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d); | ||||||
2301 | w--; | ||||||
2302 | } | ||||||
2303 | } | ||||||
2304 | |||||||
2305 | static void | ||||||
2306 | sse2_combine_add_ca (pixman_implementation_t *imp, | ||||||
2307 | pixman_op_t op, | ||||||
2308 | uint32_t * pd, | ||||||
2309 | const uint32_t * ps, | ||||||
2310 | const uint32_t * pm, | ||||||
2311 | int w) | ||||||
2312 | { | ||||||
2313 | uint32_t s, m, d; | ||||||
2314 | |||||||
2315 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
2316 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
2317 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
2318 | |||||||
2319 | while (w && (uintptr_t)pd & 15) | ||||||
2320 | { | ||||||
2321 | s = *ps++; | ||||||
2322 | m = *pm++; | ||||||
2323 | d = *pd; | ||||||
2324 | |||||||
2325 | *pd++ = pack_1x128_32 ( | ||||||
2326 | _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), | ||||||
2327 | unpack_32_1x128 (m)), | ||||||
2328 | unpack_32_1x128 (d))); | ||||||
2329 | w--; | ||||||
2330 | } | ||||||
2331 | |||||||
2332 | while (w >= 4) | ||||||
2333 | { | ||||||
2334 | xmm_src_hi = load_128_unaligned ((__m128i*)ps); | ||||||
2335 | xmm_mask_hi = load_128_unaligned ((__m128i*)pm); | ||||||
2336 | xmm_dst_hi = load_128_aligned ((__m128i*)pd); | ||||||
2337 | |||||||
2338 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
2339 | unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
2340 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
2341 | |||||||
2342 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
2343 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
2344 | &xmm_src_lo, &xmm_src_hi); | ||||||
2345 | |||||||
2346 | save_128_aligned ( | ||||||
2347 | (__m128i*)pd, pack_2x128_128 ( | ||||||
2348 | _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo), | ||||||
2349 | _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi))); | ||||||
2350 | |||||||
2351 | ps += 4; | ||||||
2352 | pd += 4; | ||||||
2353 | pm += 4; | ||||||
2354 | w -= 4; | ||||||
2355 | } | ||||||
2356 | |||||||
2357 | while (w) | ||||||
2358 | { | ||||||
2359 | s = *ps++; | ||||||
2360 | m = *pm++; | ||||||
2361 | d = *pd; | ||||||
2362 | |||||||
2363 | *pd++ = pack_1x128_32 ( | ||||||
2364 | _mm_adds_epu8 (pix_multiply_1x128 (unpack_32_1x128 (s), | ||||||
2365 | unpack_32_1x128 (m)), | ||||||
2366 | unpack_32_1x128 (d))); | ||||||
2367 | w--; | ||||||
2368 | } | ||||||
2369 | } | ||||||
2370 | |||||||
2371 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
2372 | create_mask_16_128 (uint16_t mask) | ||||||
2373 | { | ||||||
2374 | return _mm_set1_epi16 (mask); | ||||||
2375 | } | ||||||
2376 | |||||||
2377 | /* Work around a code generation bug in Sun Studio 12. */ | ||||||
2378 | #if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590) | ||||||
2379 | # define create_mask_2x32_128(mask0, mask1) \ | ||||||
2380 | (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1))) | ||||||
2381 | #else | ||||||
2382 | static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i | ||||||
2383 | create_mask_2x32_128 (uint32_t mask0, | ||||||
2384 | uint32_t mask1) | ||||||
2385 | { | ||||||
2386 | return _mm_set_epi32 (mask0, mask1, mask0, mask1); | ||||||
2387 | } | ||||||
2388 | #endif | ||||||
2389 | |||||||
2390 | static void | ||||||
2391 | sse2_composite_over_n_8888 (pixman_implementation_t *imp, | ||||||
2392 | pixman_composite_info_t *info) | ||||||
2393 | { | ||||||
2394 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
2395 | uint32_t src; | ||||||
2396 | uint32_t *dst_line, *dst, d; | ||||||
2397 | int32_t w; | ||||||
2398 | int dst_stride; | ||||||
2399 | __m128i xmm_src, xmm_alpha; | ||||||
2400 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
2401 | |||||||
2402 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
2403 | |||||||
2404 | if (src == 0) | ||||||
2405 | return; | ||||||
2406 | |||||||
2407 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
2408 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
2409 | |||||||
2410 | xmm_src = expand_pixel_32_1x128 (src); | ||||||
2411 | xmm_alpha = expand_alpha_1x128 (xmm_src); | ||||||
2412 | |||||||
2413 | while (height--) | ||||||
2414 | { | ||||||
2415 | dst = dst_line; | ||||||
2416 | |||||||
2417 | dst_line += dst_stride; | ||||||
2418 | w = width; | ||||||
2419 | |||||||
2420 | while (w && (uintptr_t)dst & 15) | ||||||
2421 | { | ||||||
2422 | d = *dst; | ||||||
2423 | *dst++ = pack_1x128_32 (over_1x128 (xmm_src, | ||||||
2424 | xmm_alpha, | ||||||
2425 | unpack_32_1x128 (d))); | ||||||
2426 | w--; | ||||||
2427 | } | ||||||
2428 | |||||||
2429 | while (w >= 4) | ||||||
2430 | { | ||||||
2431 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
2432 | |||||||
2433 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
2434 | |||||||
2435 | over_2x128 (&xmm_src, &xmm_src, | ||||||
2436 | &xmm_alpha, &xmm_alpha, | ||||||
2437 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
2438 | |||||||
2439 | /* rebuid the 4 pixel data and save*/ | ||||||
2440 | save_128_aligned ( | ||||||
2441 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
2442 | |||||||
2443 | w -= 4; | ||||||
2444 | dst += 4; | ||||||
2445 | } | ||||||
2446 | |||||||
2447 | while (w) | ||||||
2448 | { | ||||||
2449 | d = *dst; | ||||||
2450 | *dst++ = pack_1x128_32 (over_1x128 (xmm_src, | ||||||
2451 | xmm_alpha, | ||||||
2452 | unpack_32_1x128 (d))); | ||||||
2453 | w--; | ||||||
2454 | } | ||||||
2455 | |||||||
2456 | } | ||||||
2457 | } | ||||||
2458 | |||||||
2459 | static void | ||||||
2460 | sse2_composite_over_n_0565 (pixman_implementation_t *imp, | ||||||
2461 | pixman_composite_info_t *info) | ||||||
2462 | { | ||||||
2463 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
2464 | uint32_t src; | ||||||
2465 | uint16_t *dst_line, *dst, d; | ||||||
2466 | int32_t w; | ||||||
2467 | int dst_stride; | ||||||
2468 | __m128i xmm_src, xmm_alpha; | ||||||
2469 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | ||||||
2470 | |||||||
2471 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
2472 | |||||||
2473 | if (src == 0) | ||||||
2474 | return; | ||||||
2475 | |||||||
2476 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
2477 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
2478 | |||||||
2479 | xmm_src = expand_pixel_32_1x128 (src); | ||||||
2480 | xmm_alpha = expand_alpha_1x128 (xmm_src); | ||||||
2481 | |||||||
2482 | while (height--) | ||||||
2483 | { | ||||||
2484 | dst = dst_line; | ||||||
2485 | |||||||
2486 | dst_line += dst_stride; | ||||||
2487 | w = width; | ||||||
2488 | |||||||
2489 | while (w && (uintptr_t)dst & 15) | ||||||
2490 | { | ||||||
2491 | d = *dst; | ||||||
2492 | |||||||
2493 | *dst++ = pack_565_32_16 ( | ||||||
2494 | pack_1x128_32 (over_1x128 (xmm_src, | ||||||
2495 | xmm_alpha, | ||||||
2496 | expand565_16_1x128 (d)))); | ||||||
2497 | w--; | ||||||
2498 | } | ||||||
2499 | |||||||
2500 | while (w >= 8) | ||||||
2501 | { | ||||||
2502 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
2503 | |||||||
2504 | unpack_565_128_4x128 (xmm_dst, | ||||||
2505 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | ||||||
2506 | |||||||
2507 | over_2x128 (&xmm_src, &xmm_src, | ||||||
2508 | &xmm_alpha, &xmm_alpha, | ||||||
2509 | &xmm_dst0, &xmm_dst1); | ||||||
2510 | over_2x128 (&xmm_src, &xmm_src, | ||||||
2511 | &xmm_alpha, &xmm_alpha, | ||||||
2512 | &xmm_dst2, &xmm_dst3); | ||||||
2513 | |||||||
2514 | xmm_dst = pack_565_4x128_128 ( | ||||||
2515 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | ||||||
2516 | |||||||
2517 | save_128_aligned ((__m128i*)dst, xmm_dst); | ||||||
2518 | |||||||
2519 | dst += 8; | ||||||
2520 | w -= 8; | ||||||
2521 | } | ||||||
2522 | |||||||
2523 | while (w--) | ||||||
2524 | { | ||||||
2525 | d = *dst; | ||||||
2526 | *dst++ = pack_565_32_16 ( | ||||||
2527 | pack_1x128_32 (over_1x128 (xmm_src, xmm_alpha, | ||||||
2528 | expand565_16_1x128 (d)))); | ||||||
2529 | } | ||||||
2530 | } | ||||||
2531 | |||||||
2532 | } | ||||||
2533 | |||||||
2534 | static void | ||||||
2535 | sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, | ||||||
2536 | pixman_composite_info_t *info) | ||||||
2537 | { | ||||||
2538 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
2539 | uint32_t src; | ||||||
2540 | uint32_t *dst_line, d; | ||||||
2541 | uint32_t *mask_line, m; | ||||||
2542 | uint32_t pack_cmp; | ||||||
2543 | int dst_stride, mask_stride; | ||||||
2544 | |||||||
2545 | __m128i xmm_src; | ||||||
2546 | __m128i xmm_dst; | ||||||
2547 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
2548 | |||||||
2549 | __m128i mmx_src, mmx_mask, mmx_dest; | ||||||
2550 | |||||||
2551 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
2552 | |||||||
2553 | if (src == 0) | ||||||
2554 | return; | ||||||
2555 | |||||||
2556 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
2557 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
2558 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
2559 | mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
2560 | |||||||
2561 | xmm_src = _mm_unpacklo_epi8 ( | ||||||
2562 | create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); | ||||||
2563 | mmx_src = xmm_src; | ||||||
2564 | |||||||
2565 | while (height--) | ||||||
2566 | { | ||||||
2567 | int w = width; | ||||||
2568 | const uint32_t *pm = (uint32_t *)mask_line; | ||||||
2569 | uint32_t *pd = (uint32_t *)dst_line; | ||||||
2570 | |||||||
2571 | dst_line += dst_stride; | ||||||
2572 | mask_line += mask_stride; | ||||||
2573 | |||||||
2574 | while (w && (uintptr_t)pd & 15) | ||||||
2575 | { | ||||||
2576 | m = *pm++; | ||||||
2577 | |||||||
2578 | if (m) | ||||||
2579 | { | ||||||
2580 | d = *pd; | ||||||
2581 | |||||||
2582 | mmx_mask = unpack_32_1x128 (m); | ||||||
2583 | mmx_dest = unpack_32_1x128 (d); | ||||||
2584 | |||||||
2585 | *pd = pack_1x128_32 ( | ||||||
2586 | _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), | ||||||
2587 | mmx_dest)); | ||||||
2588 | } | ||||||
2589 | |||||||
2590 | pd++; | ||||||
2591 | w--; | ||||||
2592 | } | ||||||
2593 | |||||||
2594 | while (w >= 4) | ||||||
2595 | { | ||||||
2596 | xmm_mask = load_128_unaligned ((__m128i*)pm); | ||||||
2597 | |||||||
2598 | pack_cmp = | ||||||
2599 | _mm_movemask_epi8 ( | ||||||
2600 | _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); | ||||||
2601 | |||||||
2602 | /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ | ||||||
2603 | if (pack_cmp != 0xffff) | ||||||
2604 | { | ||||||
2605 | xmm_dst = load_128_aligned ((__m128i*)pd); | ||||||
2606 | |||||||
2607 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
2608 | |||||||
2609 | pix_multiply_2x128 (&xmm_src, &xmm_src, | ||||||
2610 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
2611 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
2612 | xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi); | ||||||
2613 | |||||||
2614 | save_128_aligned ( | ||||||
2615 | (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst)); | ||||||
2616 | } | ||||||
2617 | |||||||
2618 | pd += 4; | ||||||
2619 | pm += 4; | ||||||
2620 | w -= 4; | ||||||
2621 | } | ||||||
2622 | |||||||
2623 | while (w) | ||||||
2624 | { | ||||||
2625 | m = *pm++; | ||||||
2626 | |||||||
2627 | if (m) | ||||||
2628 | { | ||||||
2629 | d = *pd; | ||||||
2630 | |||||||
2631 | mmx_mask = unpack_32_1x128 (m); | ||||||
2632 | mmx_dest = unpack_32_1x128 (d); | ||||||
2633 | |||||||
2634 | *pd = pack_1x128_32 ( | ||||||
2635 | _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), | ||||||
2636 | mmx_dest)); | ||||||
2637 | } | ||||||
2638 | |||||||
2639 | pd++; | ||||||
2640 | w--; | ||||||
2641 | } | ||||||
2642 | } | ||||||
2643 | |||||||
2644 | } | ||||||
2645 | |||||||
2646 | static void | ||||||
2647 | sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, | ||||||
2648 | pixman_composite_info_t *info) | ||||||
2649 | { | ||||||
2650 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
2651 | uint32_t src; | ||||||
2652 | uint32_t *dst_line, d; | ||||||
2653 | uint32_t *mask_line, m; | ||||||
2654 | uint32_t pack_cmp; | ||||||
2655 | int dst_stride, mask_stride; | ||||||
2656 | |||||||
2657 | __m128i xmm_src, xmm_alpha; | ||||||
2658 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
2659 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
2660 | |||||||
2661 | __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; | ||||||
2662 | |||||||
2663 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
2664 | |||||||
2665 | if (src == 0) | ||||||
2666 | return; | ||||||
2667 | |||||||
2668 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
2669 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
2670 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
2671 | mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
2672 | |||||||
2673 | xmm_src = _mm_unpacklo_epi8 ( | ||||||
2674 | create_mask_2x32_128 (src, src), _mm_setzero_si128 ()); | ||||||
2675 | xmm_alpha = expand_alpha_1x128 (xmm_src); | ||||||
2676 | mmx_src = xmm_src; | ||||||
2677 | mmx_alpha = xmm_alpha; | ||||||
2678 | |||||||
2679 | while (height--) | ||||||
2680 | { | ||||||
2681 | int w = width; | ||||||
2682 | const uint32_t *pm = (uint32_t *)mask_line; | ||||||
2683 | uint32_t *pd = (uint32_t *)dst_line; | ||||||
2684 | |||||||
2685 | dst_line += dst_stride; | ||||||
2686 | mask_line += mask_stride; | ||||||
2687 | |||||||
2688 | while (w && (uintptr_t)pd & 15) | ||||||
2689 | { | ||||||
2690 | m = *pm++; | ||||||
2691 | |||||||
2692 | if (m) | ||||||
2693 | { | ||||||
2694 | d = *pd; | ||||||
2695 | mmx_mask = unpack_32_1x128 (m); | ||||||
2696 | mmx_dest = unpack_32_1x128 (d); | ||||||
2697 | |||||||
2698 | *pd = pack_1x128_32 (in_over_1x128 (&mmx_src, | ||||||
2699 | &mmx_alpha, | ||||||
2700 | &mmx_mask, | ||||||
2701 | &mmx_dest)); | ||||||
2702 | } | ||||||
2703 | |||||||
2704 | pd++; | ||||||
2705 | w--; | ||||||
2706 | } | ||||||
2707 | |||||||
2708 | while (w >= 4) | ||||||
2709 | { | ||||||
2710 | xmm_mask = load_128_unaligned ((__m128i*)pm); | ||||||
2711 | |||||||
2712 | pack_cmp = | ||||||
2713 | _mm_movemask_epi8 ( | ||||||
2714 | _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); | ||||||
2715 | |||||||
2716 | /* if all bits in mask are zero, pack_cmp are equal to 0xffff */ | ||||||
2717 | if (pack_cmp != 0xffff) | ||||||
2718 | { | ||||||
2719 | xmm_dst = load_128_aligned ((__m128i*)pd); | ||||||
2720 | |||||||
2721 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
2722 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
2723 | |||||||
2724 | in_over_2x128 (&xmm_src, &xmm_src, | ||||||
2725 | &xmm_alpha, &xmm_alpha, | ||||||
2726 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
2727 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
2728 | |||||||
2729 | save_128_aligned ( | ||||||
2730 | (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
2731 | } | ||||||
2732 | |||||||
2733 | pd += 4; | ||||||
2734 | pm += 4; | ||||||
2735 | w -= 4; | ||||||
2736 | } | ||||||
2737 | |||||||
2738 | while (w) | ||||||
2739 | { | ||||||
2740 | m = *pm++; | ||||||
2741 | |||||||
2742 | if (m) | ||||||
2743 | { | ||||||
2744 | d = *pd; | ||||||
2745 | mmx_mask = unpack_32_1x128 (m); | ||||||
2746 | mmx_dest = unpack_32_1x128 (d); | ||||||
2747 | |||||||
2748 | *pd = pack_1x128_32 ( | ||||||
2749 | in_over_1x128 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)); | ||||||
2750 | } | ||||||
2751 | |||||||
2752 | pd++; | ||||||
2753 | w--; | ||||||
2754 | } | ||||||
2755 | } | ||||||
2756 | |||||||
2757 | } | ||||||
2758 | |||||||
2759 | static void | ||||||
2760 | sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp, | ||||||
2761 | pixman_composite_info_t *info) | ||||||
2762 | { | ||||||
2763 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
2764 | uint32_t *dst_line, *dst; | ||||||
2765 | uint32_t *src_line, *src; | ||||||
2766 | uint32_t mask; | ||||||
2767 | int32_t w; | ||||||
2768 | int dst_stride, src_stride; | ||||||
2769 | |||||||
2770 | __m128i xmm_mask; | ||||||
2771 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | ||||||
2772 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
2773 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
2774 | |||||||
2775 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
2776 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
2777 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
2778 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
2779 | |||||||
2780 | mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); | ||||||
2781 | |||||||
2782 | xmm_mask = create_mask_16_128 (mask >> 24); | ||||||
2783 | |||||||
2784 | while (height--) | ||||||
2785 | { | ||||||
2786 | dst = dst_line; | ||||||
2787 | dst_line += dst_stride; | ||||||
2788 | src = src_line; | ||||||
2789 | src_line += src_stride; | ||||||
2790 | w = width; | ||||||
2791 | |||||||
2792 | while (w && (uintptr_t)dst & 15) | ||||||
2793 | { | ||||||
2794 | uint32_t s = *src++; | ||||||
2795 | |||||||
2796 | if (s) | ||||||
2797 | { | ||||||
2798 | uint32_t d = *dst; | ||||||
2799 | |||||||
2800 | __m128i ms = unpack_32_1x128 (s); | ||||||
2801 | __m128i alpha = expand_alpha_1x128 (ms); | ||||||
2802 | __m128i dest = xmm_mask; | ||||||
2803 | __m128i alpha_dst = unpack_32_1x128 (d); | ||||||
2804 | |||||||
2805 | *dst = pack_1x128_32 ( | ||||||
2806 | in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); | ||||||
2807 | } | ||||||
2808 | dst++; | ||||||
2809 | w--; | ||||||
2810 | } | ||||||
2811 | |||||||
2812 | while (w >= 4) | ||||||
2813 | { | ||||||
2814 | xmm_src = load_128_unaligned ((__m128i*)src); | ||||||
2815 | |||||||
2816 | if (!is_zero (xmm_src)) | ||||||
2817 | { | ||||||
2818 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
2819 | |||||||
2820 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
2821 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
2822 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
2823 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
2824 | |||||||
2825 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
2826 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
2827 | &xmm_mask, &xmm_mask, | ||||||
2828 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
2829 | |||||||
2830 | save_128_aligned ( | ||||||
2831 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
2832 | } | ||||||
2833 | |||||||
2834 | dst += 4; | ||||||
2835 | src += 4; | ||||||
2836 | w -= 4; | ||||||
2837 | } | ||||||
2838 | |||||||
2839 | while (w) | ||||||
2840 | { | ||||||
2841 | uint32_t s = *src++; | ||||||
2842 | |||||||
2843 | if (s) | ||||||
2844 | { | ||||||
2845 | uint32_t d = *dst; | ||||||
2846 | |||||||
2847 | __m128i ms = unpack_32_1x128 (s); | ||||||
2848 | __m128i alpha = expand_alpha_1x128 (ms); | ||||||
2849 | __m128i mask = xmm_mask; | ||||||
2850 | __m128i dest = unpack_32_1x128 (d); | ||||||
2851 | |||||||
2852 | *dst = pack_1x128_32 ( | ||||||
2853 | in_over_1x128 (&ms, &alpha, &mask, &dest)); | ||||||
2854 | } | ||||||
2855 | |||||||
2856 | dst++; | ||||||
2857 | w--; | ||||||
2858 | } | ||||||
2859 | } | ||||||
2860 | |||||||
2861 | } | ||||||
2862 | |||||||
2863 | static void | ||||||
2864 | sse2_composite_src_x888_0565 (pixman_implementation_t *imp, | ||||||
2865 | pixman_composite_info_t *info) | ||||||
2866 | { | ||||||
2867 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
2868 | uint16_t *dst_line, *dst; | ||||||
2869 | uint32_t *src_line, *src, s; | ||||||
2870 | int dst_stride, src_stride; | ||||||
2871 | int32_t w; | ||||||
2872 | |||||||
2873 | PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
2874 | PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
2875 | |||||||
2876 | while (height--) | ||||||
2877 | { | ||||||
2878 | dst = dst_line; | ||||||
2879 | dst_line += dst_stride; | ||||||
2880 | src = src_line; | ||||||
2881 | src_line += src_stride; | ||||||
2882 | w = width; | ||||||
2883 | |||||||
2884 | while (w && (uintptr_t)dst & 15) | ||||||
2885 | { | ||||||
2886 | s = *src++; | ||||||
2887 | *dst = convert_8888_to_0565 (s); | ||||||
2888 | dst++; | ||||||
2889 | w--; | ||||||
2890 | } | ||||||
2891 | |||||||
2892 | while (w >= 8) | ||||||
2893 | { | ||||||
2894 | __m128i xmm_src0 = load_128_unaligned ((__m128i *)src + 0); | ||||||
2895 | __m128i xmm_src1 = load_128_unaligned ((__m128i *)src + 1); | ||||||
2896 | |||||||
2897 | save_128_aligned ((__m128i*)dst, pack_565_2packedx128_128 (xmm_src0, xmm_src1)); | ||||||
2898 | |||||||
2899 | w -= 8; | ||||||
2900 | src += 8; | ||||||
2901 | dst += 8; | ||||||
2902 | } | ||||||
2903 | |||||||
2904 | while (w) | ||||||
2905 | { | ||||||
2906 | s = *src++; | ||||||
2907 | *dst = convert_8888_to_0565 (s); | ||||||
2908 | dst++; | ||||||
2909 | w--; | ||||||
2910 | } | ||||||
2911 | } | ||||||
2912 | } | ||||||
2913 | |||||||
2914 | static void | ||||||
2915 | sse2_composite_src_x888_8888 (pixman_implementation_t *imp, | ||||||
2916 | pixman_composite_info_t *info) | ||||||
2917 | { | ||||||
2918 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
2919 | uint32_t *dst_line, *dst; | ||||||
2920 | uint32_t *src_line, *src; | ||||||
2921 | int32_t w; | ||||||
2922 | int dst_stride, src_stride; | ||||||
2923 | |||||||
2924 | |||||||
2925 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
2926 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
2927 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
2928 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
2929 | |||||||
2930 | while (height--) | ||||||
2931 | { | ||||||
2932 | dst = dst_line; | ||||||
2933 | dst_line += dst_stride; | ||||||
2934 | src = src_line; | ||||||
2935 | src_line += src_stride; | ||||||
2936 | w = width; | ||||||
2937 | |||||||
2938 | while (w && (uintptr_t)dst & 15) | ||||||
2939 | { | ||||||
2940 | *dst++ = *src++ | 0xff000000; | ||||||
2941 | w--; | ||||||
2942 | } | ||||||
2943 | |||||||
2944 | while (w >= 16) | ||||||
2945 | { | ||||||
2946 | __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4; | ||||||
2947 | |||||||
2948 | xmm_src1 = load_128_unaligned ((__m128i*)src + 0); | ||||||
2949 | xmm_src2 = load_128_unaligned ((__m128i*)src + 1); | ||||||
2950 | xmm_src3 = load_128_unaligned ((__m128i*)src + 2); | ||||||
2951 | xmm_src4 = load_128_unaligned ((__m128i*)src + 3); | ||||||
2952 | |||||||
2953 | save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000)); | ||||||
2954 | save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000)); | ||||||
2955 | save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000)); | ||||||
2956 | save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000)); | ||||||
2957 | |||||||
2958 | dst += 16; | ||||||
2959 | src += 16; | ||||||
2960 | w -= 16; | ||||||
2961 | } | ||||||
2962 | |||||||
2963 | while (w) | ||||||
2964 | { | ||||||
2965 | *dst++ = *src++ | 0xff000000; | ||||||
2966 | w--; | ||||||
2967 | } | ||||||
2968 | } | ||||||
2969 | |||||||
2970 | } | ||||||
2971 | |||||||
2972 | static void | ||||||
2973 | sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp, | ||||||
2974 | pixman_composite_info_t *info) | ||||||
2975 | { | ||||||
2976 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
2977 | uint32_t *dst_line, *dst; | ||||||
2978 | uint32_t *src_line, *src; | ||||||
2979 | uint32_t mask; | ||||||
2980 | int dst_stride, src_stride; | ||||||
2981 | int32_t w; | ||||||
2982 | |||||||
2983 | __m128i xmm_mask, xmm_alpha; | ||||||
2984 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | ||||||
2985 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
2986 | |||||||
2987 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
2988 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
2989 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
2990 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
2991 | |||||||
2992 | mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8); | ||||||
2993 | |||||||
2994 | xmm_mask = create_mask_16_128 (mask >> 24); | ||||||
2995 | xmm_alpha = mask_00ff; | ||||||
2996 | |||||||
2997 | while (height--) | ||||||
2998 | { | ||||||
2999 | dst = dst_line; | ||||||
3000 | dst_line += dst_stride; | ||||||
3001 | src = src_line; | ||||||
3002 | src_line += src_stride; | ||||||
3003 | w = width; | ||||||
3004 | |||||||
3005 | while (w && (uintptr_t)dst & 15) | ||||||
3006 | { | ||||||
3007 | uint32_t s = (*src++) | 0xff000000; | ||||||
3008 | uint32_t d = *dst; | ||||||
3009 | |||||||
3010 | __m128i src = unpack_32_1x128 (s); | ||||||
3011 | __m128i alpha = xmm_alpha; | ||||||
3012 | __m128i mask = xmm_mask; | ||||||
3013 | __m128i dest = unpack_32_1x128 (d); | ||||||
3014 | |||||||
3015 | *dst++ = pack_1x128_32 ( | ||||||
3016 | in_over_1x128 (&src, &alpha, &mask, &dest)); | ||||||
3017 | |||||||
3018 | w--; | ||||||
3019 | } | ||||||
3020 | |||||||
3021 | while (w >= 4) | ||||||
3022 | { | ||||||
3023 | xmm_src = _mm_or_si128 ( | ||||||
3024 | load_128_unaligned ((__m128i*)src), mask_ff000000); | ||||||
3025 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
3026 | |||||||
3027 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
3028 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
3029 | |||||||
3030 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
3031 | &xmm_alpha, &xmm_alpha, | ||||||
3032 | &xmm_mask, &xmm_mask, | ||||||
3033 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
3034 | |||||||
3035 | save_128_aligned ( | ||||||
3036 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
3037 | |||||||
3038 | dst += 4; | ||||||
3039 | src += 4; | ||||||
3040 | w -= 4; | ||||||
3041 | |||||||
3042 | } | ||||||
3043 | |||||||
3044 | while (w) | ||||||
3045 | { | ||||||
3046 | uint32_t s = (*src++) | 0xff000000; | ||||||
3047 | uint32_t d = *dst; | ||||||
3048 | |||||||
3049 | __m128i src = unpack_32_1x128 (s); | ||||||
3050 | __m128i alpha = xmm_alpha; | ||||||
3051 | __m128i mask = xmm_mask; | ||||||
3052 | __m128i dest = unpack_32_1x128 (d); | ||||||
3053 | |||||||
3054 | *dst++ = pack_1x128_32 ( | ||||||
3055 | in_over_1x128 (&src, &alpha, &mask, &dest)); | ||||||
3056 | |||||||
3057 | w--; | ||||||
3058 | } | ||||||
3059 | } | ||||||
3060 | |||||||
3061 | } | ||||||
3062 | |||||||
3063 | static void | ||||||
3064 | sse2_composite_over_8888_8888 (pixman_implementation_t *imp, | ||||||
3065 | pixman_composite_info_t *info) | ||||||
3066 | { | ||||||
3067 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
3068 | int dst_stride, src_stride; | ||||||
3069 | uint32_t *dst_line, *dst; | ||||||
3070 | uint32_t *src_line, *src; | ||||||
3071 | |||||||
3072 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
3073 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
3074 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
3075 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
3076 | |||||||
3077 | dst = dst_line; | ||||||
3078 | src = src_line; | ||||||
3079 | |||||||
3080 | while (height--) | ||||||
3081 | { | ||||||
3082 | sse2_combine_over_u (imp, op, dst, src, NULL((void*)0), width); | ||||||
3083 | |||||||
3084 | dst += dst_stride; | ||||||
3085 | src += src_stride; | ||||||
3086 | } | ||||||
3087 | } | ||||||
3088 | |||||||
3089 | static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t | ||||||
3090 | composite_over_8888_0565pixel (uint32_t src, uint16_t dst) | ||||||
3091 | { | ||||||
3092 | __m128i ms; | ||||||
3093 | |||||||
3094 | ms = unpack_32_1x128 (src); | ||||||
3095 | return pack_565_32_16 ( | ||||||
3096 | pack_1x128_32 ( | ||||||
3097 | over_1x128 ( | ||||||
3098 | ms, expand_alpha_1x128 (ms), expand565_16_1x128 (dst)))); | ||||||
3099 | } | ||||||
3100 | |||||||
3101 | static void | ||||||
3102 | sse2_composite_over_8888_0565 (pixman_implementation_t *imp, | ||||||
3103 | pixman_composite_info_t *info) | ||||||
3104 | { | ||||||
3105 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
3106 | uint16_t *dst_line, *dst, d; | ||||||
3107 | uint32_t *src_line, *src, s; | ||||||
3108 | int dst_stride, src_stride; | ||||||
3109 | int32_t w; | ||||||
3110 | |||||||
3111 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
3112 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | ||||||
3113 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | ||||||
3114 | |||||||
3115 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
3116 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
3117 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
3118 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
3119 | |||||||
3120 | while (height--) | ||||||
3121 | { | ||||||
3122 | dst = dst_line; | ||||||
3123 | src = src_line; | ||||||
3124 | |||||||
3125 | dst_line += dst_stride; | ||||||
3126 | src_line += src_stride; | ||||||
3127 | w = width; | ||||||
3128 | |||||||
3129 | /* Align dst on a 16-byte boundary */ | ||||||
3130 | while (w && | ||||||
3131 | ((uintptr_t)dst & 15)) | ||||||
3132 | { | ||||||
3133 | s = *src++; | ||||||
3134 | d = *dst; | ||||||
3135 | |||||||
3136 | *dst++ = composite_over_8888_0565pixel (s, d); | ||||||
3137 | w--; | ||||||
3138 | } | ||||||
3139 | |||||||
3140 | /* It's a 8 pixel loop */ | ||||||
3141 | while (w >= 8) | ||||||
3142 | { | ||||||
3143 | /* I'm loading unaligned because I'm not sure | ||||||
3144 | * about the address alignment. | ||||||
3145 | */ | ||||||
3146 | xmm_src = load_128_unaligned ((__m128i*) src); | ||||||
3147 | xmm_dst = load_128_aligned ((__m128i*) dst); | ||||||
3148 | |||||||
3149 | /* Unpacking */ | ||||||
3150 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
3151 | unpack_565_128_4x128 (xmm_dst, | ||||||
3152 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | ||||||
3153 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
3154 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
3155 | |||||||
3156 | /* I'm loading next 4 pixels from memory | ||||||
3157 | * before to optimze the memory read. | ||||||
3158 | */ | ||||||
3159 | xmm_src = load_128_unaligned ((__m128i*) (src + 4)); | ||||||
3160 | |||||||
3161 | over_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
3162 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
3163 | &xmm_dst0, &xmm_dst1); | ||||||
3164 | |||||||
3165 | /* Unpacking */ | ||||||
3166 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
3167 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
3168 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
3169 | |||||||
3170 | over_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
3171 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
3172 | &xmm_dst2, &xmm_dst3); | ||||||
3173 | |||||||
3174 | save_128_aligned ( | ||||||
3175 | (__m128i*)dst, pack_565_4x128_128 ( | ||||||
3176 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); | ||||||
3177 | |||||||
3178 | w -= 8; | ||||||
3179 | dst += 8; | ||||||
3180 | src += 8; | ||||||
3181 | } | ||||||
3182 | |||||||
3183 | while (w--) | ||||||
3184 | { | ||||||
3185 | s = *src++; | ||||||
3186 | d = *dst; | ||||||
3187 | |||||||
3188 | *dst++ = composite_over_8888_0565pixel (s, d); | ||||||
3189 | } | ||||||
3190 | } | ||||||
3191 | |||||||
3192 | } | ||||||
3193 | |||||||
3194 | static void | ||||||
3195 | sse2_composite_over_n_8_8888 (pixman_implementation_t *imp, | ||||||
3196 | pixman_composite_info_t *info) | ||||||
3197 | { | ||||||
3198 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
3199 | uint32_t src, srca; | ||||||
3200 | uint32_t *dst_line, *dst; | ||||||
3201 | uint8_t *mask_line, *mask; | ||||||
3202 | int dst_stride, mask_stride; | ||||||
3203 | int32_t w; | ||||||
3204 | uint32_t m, d; | ||||||
3205 | |||||||
3206 | __m128i xmm_src, xmm_alpha, xmm_def; | ||||||
3207 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
3208 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
3209 | |||||||
3210 | __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; | ||||||
3211 | |||||||
3212 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
3213 | |||||||
3214 | srca = src >> 24; | ||||||
3215 | if (src == 0) | ||||||
3216 | return; | ||||||
3217 | |||||||
3218 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
3219 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
3220 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
3221 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
3222 | |||||||
3223 | xmm_def = create_mask_2x32_128 (src, src); | ||||||
3224 | xmm_src = expand_pixel_32_1x128 (src); | ||||||
3225 | xmm_alpha = expand_alpha_1x128 (xmm_src); | ||||||
3226 | mmx_src = xmm_src; | ||||||
3227 | mmx_alpha = xmm_alpha; | ||||||
3228 | |||||||
3229 | while (height--) | ||||||
3230 | { | ||||||
3231 | dst = dst_line; | ||||||
3232 | dst_line += dst_stride; | ||||||
3233 | mask = mask_line; | ||||||
3234 | mask_line += mask_stride; | ||||||
3235 | w = width; | ||||||
3236 | |||||||
3237 | while (w && (uintptr_t)dst & 15) | ||||||
3238 | { | ||||||
3239 | uint8_t m = *mask++; | ||||||
3240 | |||||||
3241 | if (m) | ||||||
3242 | { | ||||||
3243 | d = *dst; | ||||||
3244 | mmx_mask = expand_pixel_8_1x128 (m); | ||||||
3245 | mmx_dest = unpack_32_1x128 (d); | ||||||
3246 | |||||||
3247 | *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, | ||||||
3248 | &mmx_alpha, | ||||||
3249 | &mmx_mask, | ||||||
3250 | &mmx_dest)); | ||||||
3251 | } | ||||||
3252 | |||||||
3253 | w--; | ||||||
3254 | dst++; | ||||||
3255 | } | ||||||
3256 | |||||||
3257 | while (w >= 4) | ||||||
3258 | { | ||||||
3259 | m = *((uint32_t*)mask); | ||||||
3260 | |||||||
3261 | if (srca == 0xff && m == 0xffffffff) | ||||||
3262 | { | ||||||
3263 | save_128_aligned ((__m128i*)dst, xmm_def); | ||||||
3264 | } | ||||||
3265 | else if (m) | ||||||
3266 | { | ||||||
3267 | xmm_dst = load_128_aligned ((__m128i*) dst); | ||||||
3268 | xmm_mask = unpack_32_1x128 (m); | ||||||
3269 | xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); | ||||||
3270 | |||||||
3271 | /* Unpacking */ | ||||||
3272 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
3273 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
3274 | |||||||
3275 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | ||||||
3276 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
3277 | |||||||
3278 | in_over_2x128 (&xmm_src, &xmm_src, | ||||||
3279 | &xmm_alpha, &xmm_alpha, | ||||||
3280 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
3281 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
3282 | |||||||
3283 | save_128_aligned ( | ||||||
3284 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
3285 | } | ||||||
3286 | |||||||
3287 | w -= 4; | ||||||
3288 | dst += 4; | ||||||
3289 | mask += 4; | ||||||
3290 | } | ||||||
3291 | |||||||
3292 | while (w) | ||||||
3293 | { | ||||||
3294 | uint8_t m = *mask++; | ||||||
3295 | |||||||
3296 | if (m) | ||||||
3297 | { | ||||||
3298 | d = *dst; | ||||||
3299 | mmx_mask = expand_pixel_8_1x128 (m); | ||||||
3300 | mmx_dest = unpack_32_1x128 (d); | ||||||
3301 | |||||||
3302 | *dst = pack_1x128_32 (in_over_1x128 (&mmx_src, | ||||||
3303 | &mmx_alpha, | ||||||
3304 | &mmx_mask, | ||||||
3305 | &mmx_dest)); | ||||||
3306 | } | ||||||
3307 | |||||||
3308 | w--; | ||||||
3309 | dst++; | ||||||
3310 | } | ||||||
3311 | } | ||||||
3312 | |||||||
3313 | } | ||||||
3314 | |||||||
3315 | #if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1) | ||||||
3316 | __attribute__((__force_align_arg_pointer__)) | ||||||
3317 | #endif | ||||||
3318 | static pixman_bool_t | ||||||
3319 | sse2_fill (pixman_implementation_t *imp, | ||||||
3320 | uint32_t * bits, | ||||||
3321 | int stride, | ||||||
3322 | int bpp, | ||||||
3323 | int x, | ||||||
3324 | int y, | ||||||
3325 | int width, | ||||||
3326 | int height, | ||||||
3327 | uint32_t filler) | ||||||
3328 | { | ||||||
3329 | uint32_t byte_width; | ||||||
3330 | uint8_t *byte_line; | ||||||
3331 | |||||||
3332 | __m128i xmm_def; | ||||||
3333 | |||||||
3334 | if (bpp == 8) | ||||||
3335 | { | ||||||
3336 | uint8_t b; | ||||||
3337 | uint16_t w; | ||||||
3338 | |||||||
3339 | stride = stride * (int) sizeof (uint32_t) / 1; | ||||||
3340 | byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x); | ||||||
3341 | byte_width = width; | ||||||
3342 | stride *= 1; | ||||||
3343 | |||||||
3344 | b = filler & 0xff; | ||||||
3345 | w = (b << 8) | b; | ||||||
3346 | filler = (w << 16) | w; | ||||||
3347 | } | ||||||
3348 | else if (bpp == 16) | ||||||
3349 | { | ||||||
3350 | stride = stride * (int) sizeof (uint32_t) / 2; | ||||||
3351 | byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x); | ||||||
3352 | byte_width = 2 * width; | ||||||
3353 | stride *= 2; | ||||||
3354 | |||||||
3355 | filler = (filler & 0xffff) * 0x00010001; | ||||||
3356 | } | ||||||
3357 | else if (bpp == 32) | ||||||
3358 | { | ||||||
3359 | stride = stride * (int) sizeof (uint32_t) / 4; | ||||||
3360 | byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x); | ||||||
3361 | byte_width = 4 * width; | ||||||
3362 | stride *= 4; | ||||||
3363 | } | ||||||
3364 | else | ||||||
3365 | { | ||||||
3366 | return FALSE0; | ||||||
3367 | } | ||||||
3368 | |||||||
3369 | xmm_def = create_mask_2x32_128 (filler, filler); | ||||||
3370 | |||||||
3371 | while (height--) | ||||||
3372 | { | ||||||
3373 | int w; | ||||||
3374 | uint8_t *d = byte_line; | ||||||
3375 | byte_line += stride; | ||||||
3376 | w = byte_width; | ||||||
3377 | |||||||
3378 | if (w >= 1 && ((uintptr_t)d & 1)) | ||||||
3379 | { | ||||||
3380 | *(uint8_t *)d = filler; | ||||||
3381 | w -= 1; | ||||||
3382 | d += 1; | ||||||
3383 | } | ||||||
3384 | |||||||
3385 | while (w >= 2 && ((uintptr_t)d & 3)) | ||||||
3386 | { | ||||||
3387 | *(uint16_t *)d = filler; | ||||||
3388 | w -= 2; | ||||||
3389 | d += 2; | ||||||
3390 | } | ||||||
3391 | |||||||
3392 | while (w >= 4 && ((uintptr_t)d & 15)) | ||||||
3393 | { | ||||||
3394 | *(uint32_t *)d = filler; | ||||||
3395 | |||||||
3396 | w -= 4; | ||||||
3397 | d += 4; | ||||||
3398 | } | ||||||
3399 | |||||||
3400 | while (w >= 128) | ||||||
3401 | { | ||||||
3402 | save_128_aligned ((__m128i*)(d), xmm_def); | ||||||
3403 | save_128_aligned ((__m128i*)(d + 16), xmm_def); | ||||||
3404 | save_128_aligned ((__m128i*)(d + 32), xmm_def); | ||||||
3405 | save_128_aligned ((__m128i*)(d + 48), xmm_def); | ||||||
3406 | save_128_aligned ((__m128i*)(d + 64), xmm_def); | ||||||
3407 | save_128_aligned ((__m128i*)(d + 80), xmm_def); | ||||||
3408 | save_128_aligned ((__m128i*)(d + 96), xmm_def); | ||||||
3409 | save_128_aligned ((__m128i*)(d + 112), xmm_def); | ||||||
3410 | |||||||
3411 | d += 128; | ||||||
3412 | w -= 128; | ||||||
3413 | } | ||||||
3414 | |||||||
3415 | if (w >= 64) | ||||||
3416 | { | ||||||
3417 | save_128_aligned ((__m128i*)(d), xmm_def); | ||||||
3418 | save_128_aligned ((__m128i*)(d + 16), xmm_def); | ||||||
3419 | save_128_aligned ((__m128i*)(d + 32), xmm_def); | ||||||
3420 | save_128_aligned ((__m128i*)(d + 48), xmm_def); | ||||||
3421 | |||||||
3422 | d += 64; | ||||||
3423 | w -= 64; | ||||||
3424 | } | ||||||
3425 | |||||||
3426 | if (w >= 32) | ||||||
3427 | { | ||||||
3428 | save_128_aligned ((__m128i*)(d), xmm_def); | ||||||
3429 | save_128_aligned ((__m128i*)(d + 16), xmm_def); | ||||||
3430 | |||||||
3431 | d += 32; | ||||||
3432 | w -= 32; | ||||||
3433 | } | ||||||
3434 | |||||||
3435 | if (w >= 16) | ||||||
3436 | { | ||||||
3437 | save_128_aligned ((__m128i*)(d), xmm_def); | ||||||
3438 | |||||||
3439 | d += 16; | ||||||
3440 | w -= 16; | ||||||
3441 | } | ||||||
3442 | |||||||
3443 | while (w >= 4) | ||||||
3444 | { | ||||||
3445 | *(uint32_t *)d = filler; | ||||||
3446 | |||||||
3447 | w -= 4; | ||||||
3448 | d += 4; | ||||||
3449 | } | ||||||
3450 | |||||||
3451 | if (w >= 2) | ||||||
3452 | { | ||||||
3453 | *(uint16_t *)d = filler; | ||||||
3454 | w -= 2; | ||||||
3455 | d += 2; | ||||||
3456 | } | ||||||
3457 | |||||||
3458 | if (w >= 1) | ||||||
3459 | { | ||||||
3460 | *(uint8_t *)d = filler; | ||||||
3461 | w -= 1; | ||||||
3462 | d += 1; | ||||||
3463 | } | ||||||
3464 | } | ||||||
3465 | |||||||
3466 | return TRUE1; | ||||||
3467 | } | ||||||
3468 | |||||||
3469 | static void | ||||||
3470 | sse2_composite_src_n_8_8888 (pixman_implementation_t *imp, | ||||||
3471 | pixman_composite_info_t *info) | ||||||
3472 | { | ||||||
3473 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
3474 | uint32_t src, srca; | ||||||
3475 | uint32_t *dst_line, *dst; | ||||||
3476 | uint8_t *mask_line, *mask; | ||||||
3477 | int dst_stride, mask_stride; | ||||||
3478 | int32_t w; | ||||||
3479 | uint32_t m; | ||||||
3480 | |||||||
3481 | __m128i xmm_src, xmm_def; | ||||||
3482 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
3483 | |||||||
3484 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
3485 | |||||||
3486 | srca = src >> 24; | ||||||
3487 | if (src == 0) | ||||||
3488 | { | ||||||
3489 | sse2_fill (imp, dest_image->bits.bits, dest_image->bits.rowstride, | ||||||
3490 | PIXMAN_FORMAT_BPP (dest_image->bits.format)(((dest_image->bits.format) >> 24) ), | ||||||
3491 | dest_x, dest_y, width, height, 0); | ||||||
3492 | return; | ||||||
3493 | } | ||||||
3494 | |||||||
3495 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
3496 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
3497 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
3498 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
3499 | |||||||
3500 | xmm_def = create_mask_2x32_128 (src, src); | ||||||
3501 | xmm_src = expand_pixel_32_1x128 (src); | ||||||
3502 | |||||||
3503 | while (height--) | ||||||
3504 | { | ||||||
3505 | dst = dst_line; | ||||||
3506 | dst_line += dst_stride; | ||||||
3507 | mask = mask_line; | ||||||
3508 | mask_line += mask_stride; | ||||||
3509 | w = width; | ||||||
3510 | |||||||
3511 | while (w && (uintptr_t)dst & 15) | ||||||
3512 | { | ||||||
3513 | uint8_t m = *mask++; | ||||||
3514 | |||||||
3515 | if (m) | ||||||
3516 | { | ||||||
3517 | *dst = pack_1x128_32 ( | ||||||
3518 | pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m))); | ||||||
3519 | } | ||||||
3520 | else | ||||||
3521 | { | ||||||
3522 | *dst = 0; | ||||||
3523 | } | ||||||
3524 | |||||||
3525 | w--; | ||||||
3526 | dst++; | ||||||
3527 | } | ||||||
3528 | |||||||
3529 | while (w >= 4) | ||||||
3530 | { | ||||||
3531 | m = *((uint32_t*)mask); | ||||||
3532 | |||||||
3533 | if (srca == 0xff && m == 0xffffffff) | ||||||
3534 | { | ||||||
3535 | save_128_aligned ((__m128i*)dst, xmm_def); | ||||||
3536 | } | ||||||
3537 | else if (m) | ||||||
3538 | { | ||||||
3539 | xmm_mask = unpack_32_1x128 (m); | ||||||
3540 | xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); | ||||||
3541 | |||||||
3542 | /* Unpacking */ | ||||||
3543 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
3544 | |||||||
3545 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | ||||||
3546 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
3547 | |||||||
3548 | pix_multiply_2x128 (&xmm_src, &xmm_src, | ||||||
3549 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
3550 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
3551 | |||||||
3552 | save_128_aligned ( | ||||||
3553 | (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi)); | ||||||
3554 | } | ||||||
3555 | else | ||||||
3556 | { | ||||||
3557 | save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ()); | ||||||
3558 | } | ||||||
3559 | |||||||
3560 | w -= 4; | ||||||
3561 | dst += 4; | ||||||
3562 | mask += 4; | ||||||
3563 | } | ||||||
3564 | |||||||
3565 | while (w) | ||||||
3566 | { | ||||||
3567 | uint8_t m = *mask++; | ||||||
3568 | |||||||
3569 | if (m) | ||||||
3570 | { | ||||||
3571 | *dst = pack_1x128_32 ( | ||||||
3572 | pix_multiply_1x128 ( | ||||||
3573 | xmm_src, expand_pixel_8_1x128 (m))); | ||||||
3574 | } | ||||||
3575 | else | ||||||
3576 | { | ||||||
3577 | *dst = 0; | ||||||
3578 | } | ||||||
3579 | |||||||
3580 | w--; | ||||||
3581 | dst++; | ||||||
3582 | } | ||||||
3583 | } | ||||||
3584 | |||||||
3585 | } | ||||||
3586 | |||||||
3587 | static void | ||||||
3588 | sse2_composite_over_n_8_0565 (pixman_implementation_t *imp, | ||||||
3589 | pixman_composite_info_t *info) | ||||||
3590 | { | ||||||
3591 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
3592 | uint32_t src; | ||||||
3593 | uint16_t *dst_line, *dst, d; | ||||||
3594 | uint8_t *mask_line, *mask; | ||||||
3595 | int dst_stride, mask_stride; | ||||||
3596 | int32_t w; | ||||||
3597 | uint32_t m; | ||||||
3598 | __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; | ||||||
3599 | |||||||
3600 | __m128i xmm_src, xmm_alpha; | ||||||
3601 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
3602 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | ||||||
3603 | |||||||
3604 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
3605 | |||||||
3606 | if (src == 0) | ||||||
3607 | return; | ||||||
3608 | |||||||
3609 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
3610 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
3611 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
3612 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
3613 | |||||||
3614 | xmm_src = expand_pixel_32_1x128 (src); | ||||||
3615 | xmm_alpha = expand_alpha_1x128 (xmm_src); | ||||||
3616 | mmx_src = xmm_src; | ||||||
3617 | mmx_alpha = xmm_alpha; | ||||||
3618 | |||||||
3619 | while (height--) | ||||||
3620 | { | ||||||
3621 | dst = dst_line; | ||||||
3622 | dst_line += dst_stride; | ||||||
3623 | mask = mask_line; | ||||||
3624 | mask_line += mask_stride; | ||||||
3625 | w = width; | ||||||
3626 | |||||||
3627 | while (w && (uintptr_t)dst & 15) | ||||||
3628 | { | ||||||
3629 | m = *mask++; | ||||||
3630 | |||||||
3631 | if (m) | ||||||
3632 | { | ||||||
3633 | d = *dst; | ||||||
3634 | mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); | ||||||
3635 | mmx_dest = expand565_16_1x128 (d); | ||||||
3636 | |||||||
3637 | *dst = pack_565_32_16 ( | ||||||
3638 | pack_1x128_32 ( | ||||||
3639 | in_over_1x128 ( | ||||||
3640 | &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); | ||||||
3641 | } | ||||||
3642 | |||||||
3643 | w--; | ||||||
3644 | dst++; | ||||||
3645 | } | ||||||
3646 | |||||||
3647 | while (w >= 8) | ||||||
3648 | { | ||||||
3649 | xmm_dst = load_128_aligned ((__m128i*) dst); | ||||||
3650 | unpack_565_128_4x128 (xmm_dst, | ||||||
3651 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | ||||||
3652 | |||||||
3653 | m = *((uint32_t*)mask); | ||||||
3654 | mask += 4; | ||||||
3655 | |||||||
3656 | if (m) | ||||||
3657 | { | ||||||
3658 | xmm_mask = unpack_32_1x128 (m); | ||||||
3659 | xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); | ||||||
3660 | |||||||
3661 | /* Unpacking */ | ||||||
3662 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
3663 | |||||||
3664 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | ||||||
3665 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
3666 | |||||||
3667 | in_over_2x128 (&xmm_src, &xmm_src, | ||||||
3668 | &xmm_alpha, &xmm_alpha, | ||||||
3669 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
3670 | &xmm_dst0, &xmm_dst1); | ||||||
3671 | } | ||||||
3672 | |||||||
3673 | m = *((uint32_t*)mask); | ||||||
3674 | mask += 4; | ||||||
3675 | |||||||
3676 | if (m) | ||||||
3677 | { | ||||||
3678 | xmm_mask = unpack_32_1x128 (m); | ||||||
3679 | xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ()); | ||||||
3680 | |||||||
3681 | /* Unpacking */ | ||||||
3682 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
3683 | |||||||
3684 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | ||||||
3685 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
3686 | in_over_2x128 (&xmm_src, &xmm_src, | ||||||
3687 | &xmm_alpha, &xmm_alpha, | ||||||
3688 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
3689 | &xmm_dst2, &xmm_dst3); | ||||||
3690 | } | ||||||
3691 | |||||||
3692 | save_128_aligned ( | ||||||
3693 | (__m128i*)dst, pack_565_4x128_128 ( | ||||||
3694 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); | ||||||
3695 | |||||||
3696 | w -= 8; | ||||||
3697 | dst += 8; | ||||||
3698 | } | ||||||
3699 | |||||||
3700 | while (w) | ||||||
3701 | { | ||||||
3702 | m = *mask++; | ||||||
3703 | |||||||
3704 | if (m) | ||||||
3705 | { | ||||||
3706 | d = *dst; | ||||||
3707 | mmx_mask = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); | ||||||
3708 | mmx_dest = expand565_16_1x128 (d); | ||||||
3709 | |||||||
3710 | *dst = pack_565_32_16 ( | ||||||
3711 | pack_1x128_32 ( | ||||||
3712 | in_over_1x128 ( | ||||||
3713 | &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); | ||||||
3714 | } | ||||||
3715 | |||||||
3716 | w--; | ||||||
3717 | dst++; | ||||||
3718 | } | ||||||
3719 | } | ||||||
3720 | |||||||
3721 | } | ||||||
3722 | |||||||
3723 | static void | ||||||
3724 | sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp, | ||||||
3725 | pixman_composite_info_t *info) | ||||||
3726 | { | ||||||
3727 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
3728 | uint16_t *dst_line, *dst, d; | ||||||
3729 | uint32_t *src_line, *src, s; | ||||||
3730 | int dst_stride, src_stride; | ||||||
3731 | int32_t w; | ||||||
3732 | uint32_t opaque, zero; | ||||||
3733 | |||||||
3734 | __m128i ms; | ||||||
3735 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | ||||||
3736 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | ||||||
3737 | |||||||
3738 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
3739 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
3740 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
3741 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
3742 | |||||||
3743 | while (height--) | ||||||
3744 | { | ||||||
3745 | dst = dst_line; | ||||||
3746 | dst_line += dst_stride; | ||||||
3747 | src = src_line; | ||||||
3748 | src_line += src_stride; | ||||||
3749 | w = width; | ||||||
3750 | |||||||
3751 | while (w && (uintptr_t)dst & 15) | ||||||
3752 | { | ||||||
3753 | s = *src++; | ||||||
3754 | d = *dst; | ||||||
3755 | |||||||
3756 | ms = unpack_32_1x128 (s); | ||||||
3757 | |||||||
3758 | *dst++ = pack_565_32_16 ( | ||||||
3759 | pack_1x128_32 ( | ||||||
3760 | over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); | ||||||
3761 | w--; | ||||||
3762 | } | ||||||
3763 | |||||||
3764 | while (w >= 8) | ||||||
3765 | { | ||||||
3766 | /* First round */ | ||||||
3767 | xmm_src = load_128_unaligned ((__m128i*)src); | ||||||
3768 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
3769 | |||||||
3770 | opaque = is_opaque (xmm_src); | ||||||
3771 | zero = is_zero (xmm_src); | ||||||
3772 | |||||||
3773 | unpack_565_128_4x128 (xmm_dst, | ||||||
3774 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | ||||||
3775 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
3776 | |||||||
3777 | /* preload next round*/ | ||||||
3778 | xmm_src = load_128_unaligned ((__m128i*)(src + 4)); | ||||||
3779 | |||||||
3780 | if (opaque) | ||||||
3781 | { | ||||||
3782 | invert_colors_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
3783 | &xmm_dst0, &xmm_dst1); | ||||||
3784 | } | ||||||
3785 | else if (!zero) | ||||||
3786 | { | ||||||
3787 | over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
3788 | &xmm_dst0, &xmm_dst1); | ||||||
3789 | } | ||||||
3790 | |||||||
3791 | /* Second round */ | ||||||
3792 | opaque = is_opaque (xmm_src); | ||||||
3793 | zero = is_zero (xmm_src); | ||||||
3794 | |||||||
3795 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
3796 | |||||||
3797 | if (opaque) | ||||||
3798 | { | ||||||
3799 | invert_colors_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
3800 | &xmm_dst2, &xmm_dst3); | ||||||
3801 | } | ||||||
3802 | else if (!zero) | ||||||
3803 | { | ||||||
3804 | over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
3805 | &xmm_dst2, &xmm_dst3); | ||||||
3806 | } | ||||||
3807 | |||||||
3808 | save_128_aligned ( | ||||||
3809 | (__m128i*)dst, pack_565_4x128_128 ( | ||||||
3810 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); | ||||||
3811 | |||||||
3812 | w -= 8; | ||||||
3813 | src += 8; | ||||||
3814 | dst += 8; | ||||||
3815 | } | ||||||
3816 | |||||||
3817 | while (w) | ||||||
3818 | { | ||||||
3819 | s = *src++; | ||||||
3820 | d = *dst; | ||||||
3821 | |||||||
3822 | ms = unpack_32_1x128 (s); | ||||||
3823 | |||||||
3824 | *dst++ = pack_565_32_16 ( | ||||||
3825 | pack_1x128_32 ( | ||||||
3826 | over_rev_non_pre_1x128 (ms, expand565_16_1x128 (d)))); | ||||||
3827 | w--; | ||||||
3828 | } | ||||||
3829 | } | ||||||
3830 | |||||||
3831 | } | ||||||
3832 | |||||||
3833 | static void | ||||||
3834 | sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp, | ||||||
3835 | pixman_composite_info_t *info) | ||||||
3836 | { | ||||||
3837 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
3838 | uint32_t *dst_line, *dst, d; | ||||||
3839 | uint32_t *src_line, *src, s; | ||||||
3840 | int dst_stride, src_stride; | ||||||
3841 | int32_t w; | ||||||
3842 | uint32_t opaque, zero; | ||||||
3843 | |||||||
3844 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
3845 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
3846 | |||||||
3847 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
3848 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
3849 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
3850 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
3851 | |||||||
3852 | while (height--) | ||||||
3853 | { | ||||||
3854 | dst = dst_line; | ||||||
3855 | dst_line += dst_stride; | ||||||
3856 | src = src_line; | ||||||
3857 | src_line += src_stride; | ||||||
3858 | w = width; | ||||||
3859 | |||||||
3860 | while (w && (uintptr_t)dst & 15) | ||||||
3861 | { | ||||||
3862 | s = *src++; | ||||||
3863 | d = *dst; | ||||||
3864 | |||||||
3865 | *dst++ = pack_1x128_32 ( | ||||||
3866 | over_rev_non_pre_1x128 ( | ||||||
3867 | unpack_32_1x128 (s), unpack_32_1x128 (d))); | ||||||
3868 | |||||||
3869 | w--; | ||||||
3870 | } | ||||||
3871 | |||||||
3872 | while (w >= 4) | ||||||
3873 | { | ||||||
3874 | xmm_src_hi = load_128_unaligned ((__m128i*)src); | ||||||
3875 | |||||||
3876 | opaque = is_opaque (xmm_src_hi); | ||||||
3877 | zero = is_zero (xmm_src_hi); | ||||||
3878 | |||||||
3879 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
3880 | |||||||
3881 | if (opaque) | ||||||
3882 | { | ||||||
3883 | invert_colors_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
3884 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
3885 | |||||||
3886 | save_128_aligned ( | ||||||
3887 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
3888 | } | ||||||
3889 | else if (!zero) | ||||||
3890 | { | ||||||
3891 | xmm_dst_hi = load_128_aligned ((__m128i*)dst); | ||||||
3892 | |||||||
3893 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
3894 | |||||||
3895 | over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
3896 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
3897 | |||||||
3898 | save_128_aligned ( | ||||||
3899 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
3900 | } | ||||||
3901 | |||||||
3902 | w -= 4; | ||||||
3903 | dst += 4; | ||||||
3904 | src += 4; | ||||||
3905 | } | ||||||
3906 | |||||||
3907 | while (w) | ||||||
3908 | { | ||||||
3909 | s = *src++; | ||||||
3910 | d = *dst; | ||||||
3911 | |||||||
3912 | *dst++ = pack_1x128_32 ( | ||||||
3913 | over_rev_non_pre_1x128 ( | ||||||
3914 | unpack_32_1x128 (s), unpack_32_1x128 (d))); | ||||||
3915 | |||||||
3916 | w--; | ||||||
3917 | } | ||||||
3918 | } | ||||||
3919 | |||||||
3920 | } | ||||||
3921 | |||||||
3922 | static void | ||||||
3923 | sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, | ||||||
3924 | pixman_composite_info_t *info) | ||||||
3925 | { | ||||||
3926 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
3927 | uint32_t src; | ||||||
3928 | uint16_t *dst_line, *dst, d; | ||||||
3929 | uint32_t *mask_line, *mask, m; | ||||||
3930 | int dst_stride, mask_stride; | ||||||
3931 | int w; | ||||||
3932 | uint32_t pack_cmp; | ||||||
3933 | |||||||
3934 | __m128i xmm_src, xmm_alpha; | ||||||
3935 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
3936 | __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3; | ||||||
3937 | |||||||
3938 | __m128i mmx_src, mmx_alpha, mmx_mask, mmx_dest; | ||||||
3939 | |||||||
3940 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
3941 | |||||||
3942 | if (src == 0) | ||||||
3943 | return; | ||||||
3944 | |||||||
3945 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
3946 | dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t); (dst_line) = ((uint16_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
3947 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
3948 | mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
3949 | |||||||
3950 | xmm_src = expand_pixel_32_1x128 (src); | ||||||
3951 | xmm_alpha = expand_alpha_1x128 (xmm_src); | ||||||
3952 | mmx_src = xmm_src; | ||||||
3953 | mmx_alpha = xmm_alpha; | ||||||
3954 | |||||||
3955 | while (height--) | ||||||
3956 | { | ||||||
3957 | w = width; | ||||||
3958 | mask = mask_line; | ||||||
3959 | dst = dst_line; | ||||||
3960 | mask_line += mask_stride; | ||||||
3961 | dst_line += dst_stride; | ||||||
3962 | |||||||
3963 | while (w && ((uintptr_t)dst & 15)) | ||||||
3964 | { | ||||||
3965 | m = *(uint32_t *) mask; | ||||||
3966 | |||||||
3967 | if (m) | ||||||
3968 | { | ||||||
3969 | d = *dst; | ||||||
3970 | mmx_mask = unpack_32_1x128 (m); | ||||||
3971 | mmx_dest = expand565_16_1x128 (d); | ||||||
3972 | |||||||
3973 | *dst = pack_565_32_16 ( | ||||||
3974 | pack_1x128_32 ( | ||||||
3975 | in_over_1x128 ( | ||||||
3976 | &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); | ||||||
3977 | } | ||||||
3978 | |||||||
3979 | w--; | ||||||
3980 | dst++; | ||||||
3981 | mask++; | ||||||
3982 | } | ||||||
3983 | |||||||
3984 | while (w >= 8) | ||||||
3985 | { | ||||||
3986 | /* First round */ | ||||||
3987 | xmm_mask = load_128_unaligned ((__m128i*)mask); | ||||||
3988 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
3989 | |||||||
3990 | pack_cmp = _mm_movemask_epi8 ( | ||||||
3991 | _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); | ||||||
3992 | |||||||
3993 | unpack_565_128_4x128 (xmm_dst, | ||||||
3994 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3); | ||||||
3995 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
3996 | |||||||
3997 | /* preload next round */ | ||||||
3998 | xmm_mask = load_128_unaligned ((__m128i*)(mask + 4)); | ||||||
3999 | |||||||
4000 | /* preload next round */ | ||||||
4001 | if (pack_cmp != 0xffff) | ||||||
4002 | { | ||||||
4003 | in_over_2x128 (&xmm_src, &xmm_src, | ||||||
4004 | &xmm_alpha, &xmm_alpha, | ||||||
4005 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
4006 | &xmm_dst0, &xmm_dst1); | ||||||
4007 | } | ||||||
4008 | |||||||
4009 | /* Second round */ | ||||||
4010 | pack_cmp = _mm_movemask_epi8 ( | ||||||
4011 | _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ())); | ||||||
4012 | |||||||
4013 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
4014 | |||||||
4015 | if (pack_cmp != 0xffff) | ||||||
4016 | { | ||||||
4017 | in_over_2x128 (&xmm_src, &xmm_src, | ||||||
4018 | &xmm_alpha, &xmm_alpha, | ||||||
4019 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
4020 | &xmm_dst2, &xmm_dst3); | ||||||
4021 | } | ||||||
4022 | |||||||
4023 | save_128_aligned ( | ||||||
4024 | (__m128i*)dst, pack_565_4x128_128 ( | ||||||
4025 | &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3)); | ||||||
4026 | |||||||
4027 | w -= 8; | ||||||
4028 | dst += 8; | ||||||
4029 | mask += 8; | ||||||
4030 | } | ||||||
4031 | |||||||
4032 | while (w) | ||||||
4033 | { | ||||||
4034 | m = *(uint32_t *) mask; | ||||||
4035 | |||||||
4036 | if (m) | ||||||
4037 | { | ||||||
4038 | d = *dst; | ||||||
4039 | mmx_mask = unpack_32_1x128 (m); | ||||||
4040 | mmx_dest = expand565_16_1x128 (d); | ||||||
4041 | |||||||
4042 | *dst = pack_565_32_16 ( | ||||||
4043 | pack_1x128_32 ( | ||||||
4044 | in_over_1x128 ( | ||||||
4045 | &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest))); | ||||||
4046 | } | ||||||
4047 | |||||||
4048 | w--; | ||||||
4049 | dst++; | ||||||
4050 | mask++; | ||||||
4051 | } | ||||||
4052 | } | ||||||
4053 | |||||||
4054 | } | ||||||
4055 | |||||||
4056 | static void | ||||||
4057 | sse2_composite_in_n_8_8 (pixman_implementation_t *imp, | ||||||
4058 | pixman_composite_info_t *info) | ||||||
4059 | { | ||||||
4060 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4061 | uint8_t *dst_line, *dst; | ||||||
4062 | uint8_t *mask_line, *mask; | ||||||
4063 | int dst_stride, mask_stride; | ||||||
4064 | uint32_t d, m; | ||||||
4065 | uint32_t src; | ||||||
4066 | int32_t w; | ||||||
4067 | |||||||
4068 | __m128i xmm_alpha; | ||||||
4069 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
4070 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
4071 | |||||||
4072 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4073 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4074 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
4075 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
4076 | |||||||
4077 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
4078 | |||||||
4079 | xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); | ||||||
4080 | |||||||
4081 | while (height--) | ||||||
4082 | { | ||||||
4083 | dst = dst_line; | ||||||
4084 | dst_line += dst_stride; | ||||||
4085 | mask = mask_line; | ||||||
4086 | mask_line += mask_stride; | ||||||
4087 | w = width; | ||||||
4088 | |||||||
4089 | while (w && ((uintptr_t)dst & 15)) | ||||||
4090 | { | ||||||
4091 | m = (uint32_t) *mask++; | ||||||
4092 | d = (uint32_t) *dst; | ||||||
4093 | |||||||
4094 | *dst++ = (uint8_t) pack_1x128_32 ( | ||||||
4095 | pix_multiply_1x128 ( | ||||||
4096 | pix_multiply_1x128 (xmm_alpha, | ||||||
4097 | unpack_32_1x128 (m)), | ||||||
4098 | unpack_32_1x128 (d))); | ||||||
4099 | w--; | ||||||
4100 | } | ||||||
4101 | |||||||
4102 | while (w >= 16) | ||||||
4103 | { | ||||||
4104 | xmm_mask = load_128_unaligned ((__m128i*)mask); | ||||||
4105 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
4106 | |||||||
4107 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
4108 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
4109 | |||||||
4110 | pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, | ||||||
4111 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
4112 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
4113 | |||||||
4114 | pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, | ||||||
4115 | &xmm_dst_lo, &xmm_dst_hi, | ||||||
4116 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
4117 | |||||||
4118 | save_128_aligned ( | ||||||
4119 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
4120 | |||||||
4121 | mask += 16; | ||||||
4122 | dst += 16; | ||||||
4123 | w -= 16; | ||||||
4124 | } | ||||||
4125 | |||||||
4126 | while (w) | ||||||
4127 | { | ||||||
4128 | m = (uint32_t) *mask++; | ||||||
4129 | d = (uint32_t) *dst; | ||||||
4130 | |||||||
4131 | *dst++ = (uint8_t) pack_1x128_32 ( | ||||||
4132 | pix_multiply_1x128 ( | ||||||
4133 | pix_multiply_1x128 ( | ||||||
4134 | xmm_alpha, unpack_32_1x128 (m)), | ||||||
4135 | unpack_32_1x128 (d))); | ||||||
4136 | w--; | ||||||
4137 | } | ||||||
4138 | } | ||||||
4139 | |||||||
4140 | } | ||||||
4141 | |||||||
4142 | static void | ||||||
4143 | sse2_composite_in_n_8 (pixman_implementation_t *imp, | ||||||
4144 | pixman_composite_info_t *info) | ||||||
4145 | { | ||||||
4146 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4147 | uint8_t *dst_line, *dst; | ||||||
4148 | int dst_stride; | ||||||
4149 | uint32_t d; | ||||||
4150 | uint32_t src; | ||||||
4151 | int32_t w; | ||||||
4152 | |||||||
4153 | __m128i xmm_alpha; | ||||||
4154 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
4155 | |||||||
4156 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4157 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4158 | |||||||
4159 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
4160 | |||||||
4161 | xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); | ||||||
4162 | |||||||
4163 | src = src >> 24; | ||||||
4164 | |||||||
4165 | if (src == 0xff) | ||||||
4166 | return; | ||||||
4167 | |||||||
4168 | if (src == 0x00) | ||||||
4169 | { | ||||||
4170 | pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, | ||||||
4171 | 8, dest_x, dest_y, width, height, src); | ||||||
4172 | |||||||
4173 | return; | ||||||
4174 | } | ||||||
4175 | |||||||
4176 | while (height--) | ||||||
4177 | { | ||||||
4178 | dst = dst_line; | ||||||
4179 | dst_line += dst_stride; | ||||||
4180 | w = width; | ||||||
4181 | |||||||
4182 | while (w && ((uintptr_t)dst & 15)) | ||||||
4183 | { | ||||||
4184 | d = (uint32_t) *dst; | ||||||
4185 | |||||||
4186 | *dst++ = (uint8_t) pack_1x128_32 ( | ||||||
4187 | pix_multiply_1x128 ( | ||||||
4188 | xmm_alpha, | ||||||
4189 | unpack_32_1x128 (d))); | ||||||
4190 | w--; | ||||||
4191 | } | ||||||
4192 | |||||||
4193 | while (w >= 16) | ||||||
4194 | { | ||||||
4195 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
4196 | |||||||
4197 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
4198 | |||||||
4199 | pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, | ||||||
4200 | &xmm_dst_lo, &xmm_dst_hi, | ||||||
4201 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
4202 | |||||||
4203 | save_128_aligned ( | ||||||
4204 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
4205 | |||||||
4206 | dst += 16; | ||||||
4207 | w -= 16; | ||||||
4208 | } | ||||||
4209 | |||||||
4210 | while (w) | ||||||
4211 | { | ||||||
4212 | d = (uint32_t) *dst; | ||||||
4213 | |||||||
4214 | *dst++ = (uint8_t) pack_1x128_32 ( | ||||||
4215 | pix_multiply_1x128 ( | ||||||
4216 | xmm_alpha, | ||||||
4217 | unpack_32_1x128 (d))); | ||||||
4218 | w--; | ||||||
4219 | } | ||||||
4220 | } | ||||||
4221 | |||||||
4222 | } | ||||||
4223 | |||||||
4224 | static void | ||||||
4225 | sse2_composite_in_8_8 (pixman_implementation_t *imp, | ||||||
4226 | pixman_composite_info_t *info) | ||||||
4227 | { | ||||||
4228 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4229 | uint8_t *dst_line, *dst; | ||||||
4230 | uint8_t *src_line, *src; | ||||||
4231 | int src_stride, dst_stride; | ||||||
4232 | int32_t w; | ||||||
4233 | uint32_t s, d; | ||||||
4234 | |||||||
4235 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | ||||||
4236 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
4237 | |||||||
4238 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4239 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4240 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
4241 | src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
4242 | |||||||
4243 | while (height--) | ||||||
4244 | { | ||||||
4245 | dst = dst_line; | ||||||
4246 | dst_line += dst_stride; | ||||||
4247 | src = src_line; | ||||||
4248 | src_line += src_stride; | ||||||
4249 | w = width; | ||||||
4250 | |||||||
4251 | while (w && ((uintptr_t)dst & 15)) | ||||||
4252 | { | ||||||
4253 | s = (uint32_t) *src++; | ||||||
4254 | d = (uint32_t) *dst; | ||||||
4255 | |||||||
4256 | *dst++ = (uint8_t) pack_1x128_32 ( | ||||||
4257 | pix_multiply_1x128 ( | ||||||
4258 | unpack_32_1x128 (s), unpack_32_1x128 (d))); | ||||||
4259 | w--; | ||||||
4260 | } | ||||||
4261 | |||||||
4262 | while (w >= 16) | ||||||
4263 | { | ||||||
4264 | xmm_src = load_128_unaligned ((__m128i*)src); | ||||||
4265 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
4266 | |||||||
4267 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
4268 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
4269 | |||||||
4270 | pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
4271 | &xmm_dst_lo, &xmm_dst_hi, | ||||||
4272 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
4273 | |||||||
4274 | save_128_aligned ( | ||||||
4275 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
4276 | |||||||
4277 | src += 16; | ||||||
4278 | dst += 16; | ||||||
4279 | w -= 16; | ||||||
4280 | } | ||||||
4281 | |||||||
4282 | while (w) | ||||||
4283 | { | ||||||
4284 | s = (uint32_t) *src++; | ||||||
4285 | d = (uint32_t) *dst; | ||||||
4286 | |||||||
4287 | *dst++ = (uint8_t) pack_1x128_32 ( | ||||||
4288 | pix_multiply_1x128 (unpack_32_1x128 (s), unpack_32_1x128 (d))); | ||||||
4289 | w--; | ||||||
4290 | } | ||||||
4291 | } | ||||||
4292 | |||||||
4293 | } | ||||||
4294 | |||||||
4295 | static void | ||||||
4296 | sse2_composite_add_n_8_8 (pixman_implementation_t *imp, | ||||||
4297 | pixman_composite_info_t *info) | ||||||
4298 | { | ||||||
4299 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4300 | uint8_t *dst_line, *dst; | ||||||
4301 | uint8_t *mask_line, *mask; | ||||||
4302 | int dst_stride, mask_stride; | ||||||
4303 | int32_t w; | ||||||
4304 | uint32_t src; | ||||||
4305 | uint32_t m, d; | ||||||
4306 | |||||||
4307 | __m128i xmm_alpha; | ||||||
4308 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
4309 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
4310 | |||||||
4311 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4312 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4313 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
4314 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
4315 | |||||||
4316 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
4317 | |||||||
4318 | xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src)); | ||||||
4319 | |||||||
4320 | while (height--) | ||||||
4321 | { | ||||||
4322 | dst = dst_line; | ||||||
4323 | dst_line += dst_stride; | ||||||
4324 | mask = mask_line; | ||||||
4325 | mask_line += mask_stride; | ||||||
4326 | w = width; | ||||||
4327 | |||||||
4328 | while (w && ((uintptr_t)dst & 15)) | ||||||
4329 | { | ||||||
4330 | m = (uint32_t) *mask++; | ||||||
4331 | d = (uint32_t) *dst; | ||||||
4332 | |||||||
4333 | *dst++ = (uint8_t) pack_1x128_32 ( | ||||||
4334 | _mm_adds_epu16 ( | ||||||
4335 | pix_multiply_1x128 ( | ||||||
4336 | xmm_alpha, unpack_32_1x128 (m)), | ||||||
4337 | unpack_32_1x128 (d))); | ||||||
4338 | w--; | ||||||
4339 | } | ||||||
4340 | |||||||
4341 | while (w >= 16) | ||||||
4342 | { | ||||||
4343 | xmm_mask = load_128_unaligned ((__m128i*)mask); | ||||||
4344 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
4345 | |||||||
4346 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
4347 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
4348 | |||||||
4349 | pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, | ||||||
4350 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
4351 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
4352 | |||||||
4353 | xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); | ||||||
4354 | xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); | ||||||
4355 | |||||||
4356 | save_128_aligned ( | ||||||
4357 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
4358 | |||||||
4359 | mask += 16; | ||||||
4360 | dst += 16; | ||||||
4361 | w -= 16; | ||||||
4362 | } | ||||||
4363 | |||||||
4364 | while (w) | ||||||
4365 | { | ||||||
4366 | m = (uint32_t) *mask++; | ||||||
4367 | d = (uint32_t) *dst; | ||||||
4368 | |||||||
4369 | *dst++ = (uint8_t) pack_1x128_32 ( | ||||||
4370 | _mm_adds_epu16 ( | ||||||
4371 | pix_multiply_1x128 ( | ||||||
4372 | xmm_alpha, unpack_32_1x128 (m)), | ||||||
4373 | unpack_32_1x128 (d))); | ||||||
4374 | |||||||
4375 | w--; | ||||||
4376 | } | ||||||
4377 | } | ||||||
4378 | |||||||
4379 | } | ||||||
4380 | |||||||
4381 | static void | ||||||
4382 | sse2_composite_add_n_8 (pixman_implementation_t *imp, | ||||||
4383 | pixman_composite_info_t *info) | ||||||
4384 | { | ||||||
4385 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4386 | uint8_t *dst_line, *dst; | ||||||
4387 | int dst_stride; | ||||||
4388 | int32_t w; | ||||||
4389 | uint32_t src; | ||||||
4390 | |||||||
4391 | __m128i xmm_src; | ||||||
4392 | |||||||
4393 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4394 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4395 | |||||||
4396 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
4397 | |||||||
4398 | src >>= 24; | ||||||
4399 | |||||||
4400 | if (src == 0x00) | ||||||
4401 | return; | ||||||
4402 | |||||||
4403 | if (src == 0xff) | ||||||
4404 | { | ||||||
4405 | pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, | ||||||
4406 | 8, dest_x, dest_y, width, height, 0xff); | ||||||
4407 | |||||||
4408 | return; | ||||||
4409 | } | ||||||
4410 | |||||||
4411 | src = (src << 24) | (src << 16) | (src << 8) | src; | ||||||
4412 | xmm_src = _mm_set_epi32 (src, src, src, src); | ||||||
4413 | |||||||
4414 | while (height--) | ||||||
4415 | { | ||||||
4416 | dst = dst_line; | ||||||
4417 | dst_line += dst_stride; | ||||||
4418 | w = width; | ||||||
4419 | |||||||
4420 | while (w && ((uintptr_t)dst & 15)) | ||||||
4421 | { | ||||||
4422 | *dst = (uint8_t)_mm_cvtsi128_si32 ( | ||||||
4423 | _mm_adds_epu8 ( | ||||||
4424 | xmm_src, | ||||||
4425 | _mm_cvtsi32_si128 (*dst))); | ||||||
4426 | |||||||
4427 | w--; | ||||||
4428 | dst++; | ||||||
4429 | } | ||||||
4430 | |||||||
4431 | while (w >= 16) | ||||||
4432 | { | ||||||
4433 | save_128_aligned ( | ||||||
4434 | (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); | ||||||
4435 | |||||||
4436 | dst += 16; | ||||||
4437 | w -= 16; | ||||||
4438 | } | ||||||
4439 | |||||||
4440 | while (w) | ||||||
4441 | { | ||||||
4442 | *dst = (uint8_t)_mm_cvtsi128_si32 ( | ||||||
4443 | _mm_adds_epu8 ( | ||||||
4444 | xmm_src, | ||||||
4445 | _mm_cvtsi32_si128 (*dst))); | ||||||
4446 | |||||||
4447 | w--; | ||||||
4448 | dst++; | ||||||
4449 | } | ||||||
4450 | } | ||||||
4451 | |||||||
4452 | } | ||||||
4453 | |||||||
4454 | static void | ||||||
4455 | sse2_composite_add_8_8 (pixman_implementation_t *imp, | ||||||
4456 | pixman_composite_info_t *info) | ||||||
4457 | { | ||||||
4458 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4459 | uint8_t *dst_line, *dst; | ||||||
4460 | uint8_t *src_line, *src; | ||||||
4461 | int dst_stride, src_stride; | ||||||
4462 | int32_t w; | ||||||
4463 | uint16_t t; | ||||||
4464 | |||||||
4465 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
4466 | src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
4467 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4468 | dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (dst_line) = ((uint8_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4469 | |||||||
4470 | while (height--) | ||||||
4471 | { | ||||||
4472 | dst = dst_line; | ||||||
4473 | src = src_line; | ||||||
4474 | |||||||
4475 | dst_line += dst_stride; | ||||||
4476 | src_line += src_stride; | ||||||
4477 | w = width; | ||||||
4478 | |||||||
4479 | /* Small head */ | ||||||
4480 | while (w && (uintptr_t)dst & 3) | ||||||
4481 | { | ||||||
4482 | t = (*dst) + (*src++); | ||||||
4483 | *dst++ = t | (0 - (t >> 8)); | ||||||
4484 | w--; | ||||||
4485 | } | ||||||
4486 | |||||||
4487 | sse2_combine_add_u (imp, op, | ||||||
4488 | (uint32_t*)dst, (uint32_t*)src, NULL((void*)0), w >> 2); | ||||||
4489 | |||||||
4490 | /* Small tail */ | ||||||
4491 | dst += w & 0xfffc; | ||||||
4492 | src += w & 0xfffc; | ||||||
4493 | |||||||
4494 | w &= 3; | ||||||
4495 | |||||||
4496 | while (w) | ||||||
4497 | { | ||||||
4498 | t = (*dst) + (*src++); | ||||||
4499 | *dst++ = t | (0 - (t >> 8)); | ||||||
4500 | w--; | ||||||
4501 | } | ||||||
4502 | } | ||||||
4503 | |||||||
4504 | } | ||||||
4505 | |||||||
4506 | static void | ||||||
4507 | sse2_composite_add_8888_8888 (pixman_implementation_t *imp, | ||||||
4508 | pixman_composite_info_t *info) | ||||||
4509 | { | ||||||
4510 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4511 | uint32_t *dst_line, *dst; | ||||||
4512 | uint32_t *src_line, *src; | ||||||
4513 | int dst_stride, src_stride; | ||||||
4514 | |||||||
4515 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
4516 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
4517 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4518 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4519 | |||||||
4520 | while (height--) | ||||||
4521 | { | ||||||
4522 | dst = dst_line; | ||||||
4523 | dst_line += dst_stride; | ||||||
4524 | src = src_line; | ||||||
4525 | src_line += src_stride; | ||||||
4526 | |||||||
4527 | sse2_combine_add_u (imp, op, dst, src, NULL((void*)0), width); | ||||||
4528 | } | ||||||
4529 | } | ||||||
4530 | |||||||
4531 | static void | ||||||
4532 | sse2_composite_add_n_8888 (pixman_implementation_t *imp, | ||||||
4533 | pixman_composite_info_t *info) | ||||||
4534 | { | ||||||
4535 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4536 | uint32_t *dst_line, *dst, src; | ||||||
4537 | int dst_stride; | ||||||
4538 | |||||||
4539 | __m128i xmm_src; | ||||||
4540 | |||||||
4541 | PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4542 | |||||||
4543 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
4544 | if (src == 0) | ||||||
4545 | return; | ||||||
4546 | |||||||
4547 | if (src == ~0) | ||||||
4548 | { | ||||||
4549 | pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 32, | ||||||
4550 | dest_x, dest_y, width, height, ~0); | ||||||
4551 | |||||||
4552 | return; | ||||||
4553 | } | ||||||
4554 | |||||||
4555 | xmm_src = _mm_set_epi32 (src, src, src, src); | ||||||
4556 | while (height--) | ||||||
4557 | { | ||||||
4558 | int w = width; | ||||||
4559 | uint32_t d; | ||||||
4560 | |||||||
4561 | dst = dst_line; | ||||||
4562 | dst_line += dst_stride; | ||||||
4563 | |||||||
4564 | while (w && (uintptr_t)dst & 15) | ||||||
4565 | { | ||||||
4566 | d = *dst; | ||||||
4567 | *dst++ = | ||||||
4568 | _mm_cvtsi128_si32 ( _mm_adds_epu8 (xmm_src, _mm_cvtsi32_si128 (d))); | ||||||
4569 | w--; | ||||||
4570 | } | ||||||
4571 | |||||||
4572 | while (w >= 4) | ||||||
4573 | { | ||||||
4574 | save_128_aligned | ||||||
4575 | ((__m128i*)dst, | ||||||
4576 | _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst))); | ||||||
4577 | |||||||
4578 | dst += 4; | ||||||
4579 | w -= 4; | ||||||
4580 | } | ||||||
4581 | |||||||
4582 | while (w--) | ||||||
4583 | { | ||||||
4584 | d = *dst; | ||||||
4585 | *dst++ = | ||||||
4586 | _mm_cvtsi128_si32 (_mm_adds_epu8 (xmm_src, | ||||||
4587 | _mm_cvtsi32_si128 (d))); | ||||||
4588 | } | ||||||
4589 | } | ||||||
4590 | } | ||||||
4591 | |||||||
4592 | static void | ||||||
4593 | sse2_composite_add_n_8_8888 (pixman_implementation_t *imp, | ||||||
4594 | pixman_composite_info_t *info) | ||||||
4595 | { | ||||||
4596 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4597 | uint32_t *dst_line, *dst; | ||||||
4598 | uint8_t *mask_line, *mask; | ||||||
4599 | int dst_stride, mask_stride; | ||||||
4600 | int32_t w; | ||||||
4601 | uint32_t src; | ||||||
4602 | |||||||
4603 | __m128i xmm_src; | ||||||
4604 | |||||||
4605 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
4606 | if (src == 0) | ||||||
4607 | return; | ||||||
4608 | xmm_src = expand_pixel_32_1x128 (src); | ||||||
4609 | |||||||
4610 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4611 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4612 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
4613 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
4614 | |||||||
4615 | while (height--) | ||||||
4616 | { | ||||||
4617 | dst = dst_line; | ||||||
4618 | dst_line += dst_stride; | ||||||
4619 | mask = mask_line; | ||||||
4620 | mask_line += mask_stride; | ||||||
4621 | w = width; | ||||||
4622 | |||||||
4623 | while (w && ((uintptr_t)dst & 15)) | ||||||
4624 | { | ||||||
4625 | uint8_t m = *mask++; | ||||||
4626 | if (m) | ||||||
4627 | { | ||||||
4628 | *dst = pack_1x128_32 | ||||||
4629 | (_mm_adds_epu16 | ||||||
4630 | (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), | ||||||
4631 | unpack_32_1x128 (*dst))); | ||||||
4632 | } | ||||||
4633 | dst++; | ||||||
4634 | w--; | ||||||
4635 | } | ||||||
4636 | |||||||
4637 | while (w >= 4) | ||||||
4638 | { | ||||||
4639 | uint32_t m = *(uint32_t*)mask; | ||||||
4640 | if (m) | ||||||
4641 | { | ||||||
4642 | __m128i xmm_mask_lo, xmm_mask_hi; | ||||||
4643 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
4644 | |||||||
4645 | __m128i xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
4646 | __m128i xmm_mask = | ||||||
4647 | _mm_unpacklo_epi8 (unpack_32_1x128(m), | ||||||
4648 | _mm_setzero_si128 ()); | ||||||
4649 | |||||||
4650 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
4651 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
4652 | |||||||
4653 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, | ||||||
4654 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
4655 | |||||||
4656 | pix_multiply_2x128 (&xmm_src, &xmm_src, | ||||||
4657 | &xmm_mask_lo, &xmm_mask_hi, | ||||||
4658 | &xmm_mask_lo, &xmm_mask_hi); | ||||||
4659 | |||||||
4660 | xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo); | ||||||
4661 | xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi); | ||||||
4662 | |||||||
4663 | save_128_aligned ( | ||||||
4664 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
4665 | } | ||||||
4666 | |||||||
4667 | w -= 4; | ||||||
4668 | dst += 4; | ||||||
4669 | mask += 4; | ||||||
4670 | } | ||||||
4671 | |||||||
4672 | while (w) | ||||||
4673 | { | ||||||
4674 | uint8_t m = *mask++; | ||||||
4675 | if (m) | ||||||
4676 | { | ||||||
4677 | *dst = pack_1x128_32 | ||||||
4678 | (_mm_adds_epu16 | ||||||
4679 | (pix_multiply_1x128 (xmm_src, expand_pixel_8_1x128 (m)), | ||||||
4680 | unpack_32_1x128 (*dst))); | ||||||
4681 | } | ||||||
4682 | dst++; | ||||||
4683 | w--; | ||||||
4684 | } | ||||||
4685 | } | ||||||
4686 | } | ||||||
4687 | |||||||
4688 | static pixman_bool_t | ||||||
4689 | sse2_blt (pixman_implementation_t *imp, | ||||||
4690 | uint32_t * src_bits, | ||||||
4691 | uint32_t * dst_bits, | ||||||
4692 | int src_stride, | ||||||
4693 | int dst_stride, | ||||||
4694 | int src_bpp, | ||||||
4695 | int dst_bpp, | ||||||
4696 | int src_x, | ||||||
4697 | int src_y, | ||||||
4698 | int dest_x, | ||||||
4699 | int dest_y, | ||||||
4700 | int width, | ||||||
4701 | int height) | ||||||
4702 | { | ||||||
4703 | uint8_t * src_bytes; | ||||||
4704 | uint8_t * dst_bytes; | ||||||
4705 | int byte_width; | ||||||
4706 | |||||||
4707 | if (src_bpp != dst_bpp) | ||||||
4708 | return FALSE0; | ||||||
4709 | |||||||
4710 | if (src_bpp == 16) | ||||||
4711 | { | ||||||
4712 | src_stride = src_stride * (int) sizeof (uint32_t) / 2; | ||||||
4713 | dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; | ||||||
4714 | src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); | ||||||
4715 | dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); | ||||||
4716 | byte_width = 2 * width; | ||||||
4717 | src_stride *= 2; | ||||||
4718 | dst_stride *= 2; | ||||||
4719 | } | ||||||
4720 | else if (src_bpp == 32) | ||||||
4721 | { | ||||||
4722 | src_stride = src_stride * (int) sizeof (uint32_t) / 4; | ||||||
4723 | dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; | ||||||
4724 | src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); | ||||||
4725 | dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dest_y) + (dest_x)); | ||||||
4726 | byte_width = 4 * width; | ||||||
4727 | src_stride *= 4; | ||||||
4728 | dst_stride *= 4; | ||||||
4729 | } | ||||||
4730 | else | ||||||
4731 | { | ||||||
4732 | return FALSE0; | ||||||
4733 | } | ||||||
4734 | |||||||
4735 | while (height--) | ||||||
4736 | { | ||||||
4737 | int w; | ||||||
4738 | uint8_t *s = src_bytes; | ||||||
4739 | uint8_t *d = dst_bytes; | ||||||
4740 | src_bytes += src_stride; | ||||||
4741 | dst_bytes += dst_stride; | ||||||
4742 | w = byte_width; | ||||||
4743 | |||||||
4744 | while (w >= 2 && ((uintptr_t)d & 3)) | ||||||
4745 | { | ||||||
4746 | *(uint16_t *)d = *(uint16_t *)s; | ||||||
4747 | w -= 2; | ||||||
4748 | s += 2; | ||||||
4749 | d += 2; | ||||||
4750 | } | ||||||
4751 | |||||||
4752 | while (w >= 4 && ((uintptr_t)d & 15)) | ||||||
4753 | { | ||||||
4754 | *(uint32_t *)d = *(uint32_t *)s; | ||||||
4755 | |||||||
4756 | w -= 4; | ||||||
4757 | s += 4; | ||||||
4758 | d += 4; | ||||||
4759 | } | ||||||
4760 | |||||||
4761 | while (w >= 64) | ||||||
4762 | { | ||||||
4763 | __m128i xmm0, xmm1, xmm2, xmm3; | ||||||
4764 | |||||||
4765 | xmm0 = load_128_unaligned ((__m128i*)(s)); | ||||||
4766 | xmm1 = load_128_unaligned ((__m128i*)(s + 16)); | ||||||
4767 | xmm2 = load_128_unaligned ((__m128i*)(s + 32)); | ||||||
4768 | xmm3 = load_128_unaligned ((__m128i*)(s + 48)); | ||||||
4769 | |||||||
4770 | save_128_aligned ((__m128i*)(d), xmm0); | ||||||
4771 | save_128_aligned ((__m128i*)(d + 16), xmm1); | ||||||
4772 | save_128_aligned ((__m128i*)(d + 32), xmm2); | ||||||
4773 | save_128_aligned ((__m128i*)(d + 48), xmm3); | ||||||
4774 | |||||||
4775 | s += 64; | ||||||
4776 | d += 64; | ||||||
4777 | w -= 64; | ||||||
4778 | } | ||||||
4779 | |||||||
4780 | while (w >= 16) | ||||||
4781 | { | ||||||
4782 | save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) ); | ||||||
4783 | |||||||
4784 | w -= 16; | ||||||
4785 | d += 16; | ||||||
4786 | s += 16; | ||||||
4787 | } | ||||||
4788 | |||||||
4789 | while (w >= 4) | ||||||
4790 | { | ||||||
4791 | *(uint32_t *)d = *(uint32_t *)s; | ||||||
4792 | |||||||
4793 | w -= 4; | ||||||
4794 | s += 4; | ||||||
4795 | d += 4; | ||||||
4796 | } | ||||||
4797 | |||||||
4798 | if (w >= 2) | ||||||
4799 | { | ||||||
4800 | *(uint16_t *)d = *(uint16_t *)s; | ||||||
4801 | w -= 2; | ||||||
4802 | s += 2; | ||||||
4803 | d += 2; | ||||||
4804 | } | ||||||
4805 | } | ||||||
4806 | |||||||
4807 | return TRUE1; | ||||||
4808 | } | ||||||
4809 | |||||||
4810 | static void | ||||||
4811 | sse2_composite_copy_area (pixman_implementation_t *imp, | ||||||
4812 | pixman_composite_info_t *info) | ||||||
4813 | { | ||||||
4814 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4815 | sse2_blt (imp, src_image->bits.bits, | ||||||
4816 | dest_image->bits.bits, | ||||||
4817 | src_image->bits.rowstride, | ||||||
4818 | dest_image->bits.rowstride, | ||||||
4819 | PIXMAN_FORMAT_BPP (src_image->bits.format)(((src_image->bits.format) >> 24) ), | ||||||
4820 | PIXMAN_FORMAT_BPP (dest_image->bits.format)(((dest_image->bits.format) >> 24) ), | ||||||
4821 | src_x, src_y, dest_x, dest_y, width, height); | ||||||
4822 | } | ||||||
4823 | |||||||
4824 | static void | ||||||
4825 | sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp, | ||||||
4826 | pixman_composite_info_t *info) | ||||||
4827 | { | ||||||
4828 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4829 | uint32_t *src, *src_line, s; | ||||||
4830 | uint32_t *dst, *dst_line, d; | ||||||
4831 | uint8_t *mask, *mask_line; | ||||||
4832 | uint32_t m; | ||||||
4833 | int src_stride, mask_stride, dst_stride; | ||||||
4834 | int32_t w; | ||||||
4835 | __m128i ms; | ||||||
4836 | |||||||
4837 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | ||||||
4838 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
4839 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
4840 | |||||||
4841 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4842 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4843 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
4844 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
4845 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
4846 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
4847 | |||||||
4848 | while (height--) | ||||||
4849 | { | ||||||
4850 | src = src_line; | ||||||
4851 | src_line += src_stride; | ||||||
4852 | dst = dst_line; | ||||||
4853 | dst_line += dst_stride; | ||||||
4854 | mask = mask_line; | ||||||
4855 | mask_line += mask_stride; | ||||||
4856 | |||||||
4857 | w = width; | ||||||
4858 | |||||||
4859 | while (w && (uintptr_t)dst & 15) | ||||||
4860 | { | ||||||
4861 | s = 0xff000000 | *src++; | ||||||
4862 | m = (uint32_t) *mask++; | ||||||
4863 | d = *dst; | ||||||
4864 | ms = unpack_32_1x128 (s); | ||||||
4865 | |||||||
4866 | if (m != 0xff) | ||||||
4867 | { | ||||||
4868 | __m128i ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); | ||||||
4869 | __m128i md = unpack_32_1x128 (d); | ||||||
4870 | |||||||
4871 | ms = in_over_1x128 (&ms, &mask_00ff, &ma, &md); | ||||||
4872 | } | ||||||
4873 | |||||||
4874 | *dst++ = pack_1x128_32 (ms); | ||||||
4875 | w--; | ||||||
4876 | } | ||||||
4877 | |||||||
4878 | while (w >= 4) | ||||||
4879 | { | ||||||
4880 | m = *(uint32_t*) mask; | ||||||
4881 | xmm_src = _mm_or_si128 ( | ||||||
4882 | load_128_unaligned ((__m128i*)src), mask_ff000000); | ||||||
4883 | |||||||
4884 | if (m == 0xffffffff) | ||||||
4885 | { | ||||||
4886 | save_128_aligned ((__m128i*)dst, xmm_src); | ||||||
4887 | } | ||||||
4888 | else | ||||||
4889 | { | ||||||
4890 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
4891 | |||||||
4892 | xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); | ||||||
4893 | |||||||
4894 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
4895 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
4896 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
4897 | |||||||
4898 | expand_alpha_rev_2x128 ( | ||||||
4899 | xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
4900 | |||||||
4901 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
4902 | &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, | ||||||
4903 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
4904 | |||||||
4905 | save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
4906 | } | ||||||
4907 | |||||||
4908 | src += 4; | ||||||
4909 | dst += 4; | ||||||
4910 | mask += 4; | ||||||
4911 | w -= 4; | ||||||
4912 | } | ||||||
4913 | |||||||
4914 | while (w) | ||||||
4915 | { | ||||||
4916 | m = (uint32_t) *mask++; | ||||||
4917 | |||||||
4918 | if (m) | ||||||
4919 | { | ||||||
4920 | s = 0xff000000 | *src; | ||||||
4921 | |||||||
4922 | if (m == 0xff) | ||||||
4923 | { | ||||||
4924 | *dst = s; | ||||||
4925 | } | ||||||
4926 | else | ||||||
4927 | { | ||||||
4928 | __m128i ma, md, ms; | ||||||
4929 | |||||||
4930 | d = *dst; | ||||||
4931 | |||||||
4932 | ma = expand_alpha_rev_1x128 (unpack_32_1x128 (m)); | ||||||
4933 | md = unpack_32_1x128 (d); | ||||||
4934 | ms = unpack_32_1x128 (s); | ||||||
4935 | |||||||
4936 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &mask_00ff, &ma, &md)); | ||||||
4937 | } | ||||||
4938 | |||||||
4939 | } | ||||||
4940 | |||||||
4941 | src++; | ||||||
4942 | dst++; | ||||||
4943 | w--; | ||||||
4944 | } | ||||||
4945 | } | ||||||
4946 | |||||||
4947 | } | ||||||
4948 | |||||||
4949 | static void | ||||||
4950 | sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp, | ||||||
4951 | pixman_composite_info_t *info) | ||||||
4952 | { | ||||||
4953 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
4954 | uint32_t *src, *src_line, s; | ||||||
4955 | uint32_t *dst, *dst_line, d; | ||||||
4956 | uint8_t *mask, *mask_line; | ||||||
4957 | uint32_t m; | ||||||
4958 | int src_stride, mask_stride, dst_stride; | ||||||
4959 | int32_t w; | ||||||
4960 | |||||||
4961 | __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; | ||||||
4962 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
4963 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
4964 | |||||||
4965 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
4966 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
4967 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
4968 | mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
4969 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
4970 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
4971 | |||||||
4972 | while (height--) | ||||||
4973 | { | ||||||
4974 | src = src_line; | ||||||
4975 | src_line += src_stride; | ||||||
4976 | dst = dst_line; | ||||||
4977 | dst_line += dst_stride; | ||||||
4978 | mask = mask_line; | ||||||
4979 | mask_line += mask_stride; | ||||||
4980 | |||||||
4981 | w = width; | ||||||
4982 | |||||||
4983 | while (w && (uintptr_t)dst & 15) | ||||||
4984 | { | ||||||
4985 | uint32_t sa; | ||||||
4986 | |||||||
4987 | s = *src++; | ||||||
4988 | m = (uint32_t) *mask++; | ||||||
4989 | d = *dst; | ||||||
4990 | |||||||
4991 | sa = s >> 24; | ||||||
4992 | |||||||
4993 | if (m) | ||||||
4994 | { | ||||||
4995 | if (sa == 0xff && m == 0xff) | ||||||
4996 | { | ||||||
4997 | *dst = s; | ||||||
4998 | } | ||||||
4999 | else | ||||||
5000 | { | ||||||
5001 | __m128i ms, md, ma, msa; | ||||||
5002 | |||||||
5003 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | ||||||
5004 | ms = unpack_32_1x128 (s); | ||||||
5005 | md = unpack_32_1x128 (d); | ||||||
5006 | |||||||
5007 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | ||||||
5008 | |||||||
5009 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | ||||||
5010 | } | ||||||
5011 | } | ||||||
5012 | |||||||
5013 | dst++; | ||||||
5014 | w--; | ||||||
5015 | } | ||||||
5016 | |||||||
5017 | while (w >= 4) | ||||||
5018 | { | ||||||
5019 | m = *(uint32_t *) mask; | ||||||
5020 | |||||||
5021 | if (m) | ||||||
5022 | { | ||||||
5023 | xmm_src = load_128_unaligned ((__m128i*)src); | ||||||
5024 | |||||||
5025 | if (m == 0xffffffff && is_opaque (xmm_src)) | ||||||
5026 | { | ||||||
5027 | save_128_aligned ((__m128i *)dst, xmm_src); | ||||||
5028 | } | ||||||
5029 | else | ||||||
5030 | { | ||||||
5031 | xmm_dst = load_128_aligned ((__m128i *)dst); | ||||||
5032 | |||||||
5033 | xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); | ||||||
5034 | |||||||
5035 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
5036 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
5037 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5038 | |||||||
5039 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); | ||||||
5040 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
5041 | |||||||
5042 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, | ||||||
5043 | &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5044 | |||||||
5045 | save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
5046 | } | ||||||
5047 | } | ||||||
5048 | |||||||
5049 | src += 4; | ||||||
5050 | dst += 4; | ||||||
5051 | mask += 4; | ||||||
5052 | w -= 4; | ||||||
5053 | } | ||||||
5054 | |||||||
5055 | while (w) | ||||||
5056 | { | ||||||
5057 | uint32_t sa; | ||||||
5058 | |||||||
5059 | s = *src++; | ||||||
5060 | m = (uint32_t) *mask++; | ||||||
5061 | d = *dst; | ||||||
5062 | |||||||
5063 | sa = s >> 24; | ||||||
5064 | |||||||
5065 | if (m) | ||||||
5066 | { | ||||||
5067 | if (sa == 0xff && m == 0xff) | ||||||
5068 | { | ||||||
5069 | *dst = s; | ||||||
5070 | } | ||||||
5071 | else | ||||||
5072 | { | ||||||
5073 | __m128i ms, md, ma, msa; | ||||||
5074 | |||||||
5075 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | ||||||
5076 | ms = unpack_32_1x128 (s); | ||||||
5077 | md = unpack_32_1x128 (d); | ||||||
5078 | |||||||
5079 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | ||||||
5080 | |||||||
5081 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | ||||||
5082 | } | ||||||
5083 | } | ||||||
5084 | |||||||
5085 | dst++; | ||||||
5086 | w--; | ||||||
5087 | } | ||||||
5088 | } | ||||||
5089 | |||||||
5090 | } | ||||||
5091 | |||||||
5092 | static void | ||||||
5093 | sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp, | ||||||
5094 | pixman_composite_info_t *info) | ||||||
5095 | { | ||||||
5096 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
5097 | uint32_t src; | ||||||
5098 | uint32_t *dst_line, *dst; | ||||||
5099 | __m128i xmm_src; | ||||||
5100 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
5101 | __m128i xmm_dsta_hi, xmm_dsta_lo; | ||||||
5102 | int dst_stride; | ||||||
5103 | int32_t w; | ||||||
5104 | |||||||
5105 | src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); | ||||||
5106 | |||||||
5107 | if (src == 0) | ||||||
5108 | return; | ||||||
5109 | |||||||
5110 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
5111 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
5112 | |||||||
5113 | xmm_src = expand_pixel_32_1x128 (src); | ||||||
5114 | |||||||
5115 | while (height--) | ||||||
5116 | { | ||||||
5117 | dst = dst_line; | ||||||
5118 | |||||||
5119 | dst_line += dst_stride; | ||||||
5120 | w = width; | ||||||
5121 | |||||||
5122 | while (w && (uintptr_t)dst & 15) | ||||||
5123 | { | ||||||
5124 | __m128i vd; | ||||||
5125 | |||||||
5126 | vd = unpack_32_1x128 (*dst); | ||||||
5127 | |||||||
5128 | *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), | ||||||
5129 | xmm_src)); | ||||||
5130 | w--; | ||||||
5131 | dst++; | ||||||
5132 | } | ||||||
5133 | |||||||
5134 | while (w >= 4) | ||||||
5135 | { | ||||||
5136 | __m128i tmp_lo, tmp_hi; | ||||||
5137 | |||||||
5138 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
5139 | |||||||
5140 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5141 | expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi); | ||||||
5142 | |||||||
5143 | tmp_lo = xmm_src; | ||||||
5144 | tmp_hi = xmm_src; | ||||||
5145 | |||||||
5146 | over_2x128 (&xmm_dst_lo, &xmm_dst_hi, | ||||||
5147 | &xmm_dsta_lo, &xmm_dsta_hi, | ||||||
5148 | &tmp_lo, &tmp_hi); | ||||||
5149 | |||||||
5150 | save_128_aligned ( | ||||||
5151 | (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi)); | ||||||
5152 | |||||||
5153 | w -= 4; | ||||||
5154 | dst += 4; | ||||||
5155 | } | ||||||
5156 | |||||||
5157 | while (w) | ||||||
5158 | { | ||||||
5159 | __m128i vd; | ||||||
5160 | |||||||
5161 | vd = unpack_32_1x128 (*dst); | ||||||
5162 | |||||||
5163 | *dst = pack_1x128_32 (over_1x128 (vd, expand_alpha_1x128 (vd), | ||||||
5164 | xmm_src)); | ||||||
5165 | w--; | ||||||
5166 | dst++; | ||||||
5167 | } | ||||||
5168 | |||||||
5169 | } | ||||||
5170 | |||||||
5171 | } | ||||||
5172 | |||||||
5173 | static void | ||||||
5174 | sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp, | ||||||
5175 | pixman_composite_info_t *info) | ||||||
5176 | { | ||||||
5177 | PIXMAN_COMPOSITE_ARGS (info)__attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; | ||||||
5178 | uint32_t *src, *src_line, s; | ||||||
5179 | uint32_t *dst, *dst_line, d; | ||||||
5180 | uint32_t *mask, *mask_line; | ||||||
5181 | uint32_t m; | ||||||
5182 | int src_stride, mask_stride, dst_stride; | ||||||
5183 | int32_t w; | ||||||
5184 | |||||||
5185 | __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; | ||||||
5186 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
5187 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
5188 | |||||||
5189 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0) | ||||||
5190 | dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); | ||||||
5191 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0) | ||||||
5192 | mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image ->bits.bits; __stride__ = mask_image->bits.rowstride; ( mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride ) * (mask_y) + (1) * (mask_x); } while (0); | ||||||
5193 | PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0) | ||||||
5194 | src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y ) + (1) * (src_x); } while (0); | ||||||
5195 | |||||||
5196 | while (height--) | ||||||
5197 | { | ||||||
5198 | src = src_line; | ||||||
5199 | src_line += src_stride; | ||||||
5200 | dst = dst_line; | ||||||
5201 | dst_line += dst_stride; | ||||||
5202 | mask = mask_line; | ||||||
5203 | mask_line += mask_stride; | ||||||
5204 | |||||||
5205 | w = width; | ||||||
5206 | |||||||
5207 | while (w && (uintptr_t)dst & 15) | ||||||
5208 | { | ||||||
5209 | uint32_t sa; | ||||||
5210 | |||||||
5211 | s = *src++; | ||||||
5212 | m = (*mask++) >> 24; | ||||||
5213 | d = *dst; | ||||||
5214 | |||||||
5215 | sa = s >> 24; | ||||||
5216 | |||||||
5217 | if (m) | ||||||
5218 | { | ||||||
5219 | if (sa == 0xff && m == 0xff) | ||||||
5220 | { | ||||||
5221 | *dst = s; | ||||||
5222 | } | ||||||
5223 | else | ||||||
5224 | { | ||||||
5225 | __m128i ms, md, ma, msa; | ||||||
5226 | |||||||
5227 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | ||||||
5228 | ms = unpack_32_1x128 (s); | ||||||
5229 | md = unpack_32_1x128 (d); | ||||||
5230 | |||||||
5231 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | ||||||
5232 | |||||||
5233 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | ||||||
5234 | } | ||||||
5235 | } | ||||||
5236 | |||||||
5237 | dst++; | ||||||
5238 | w--; | ||||||
5239 | } | ||||||
5240 | |||||||
5241 | while (w >= 4) | ||||||
5242 | { | ||||||
5243 | xmm_mask = load_128_unaligned ((__m128i*)mask); | ||||||
5244 | |||||||
5245 | if (!is_transparent (xmm_mask)) | ||||||
5246 | { | ||||||
5247 | xmm_src = load_128_unaligned ((__m128i*)src); | ||||||
5248 | |||||||
5249 | if (is_opaque (xmm_mask) && is_opaque (xmm_src)) | ||||||
5250 | { | ||||||
5251 | save_128_aligned ((__m128i *)dst, xmm_src); | ||||||
5252 | } | ||||||
5253 | else | ||||||
5254 | { | ||||||
5255 | xmm_dst = load_128_aligned ((__m128i *)dst); | ||||||
5256 | |||||||
5257 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
5258 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
5259 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5260 | |||||||
5261 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); | ||||||
5262 | expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
5263 | |||||||
5264 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, | ||||||
5265 | &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5266 | |||||||
5267 | save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
5268 | } | ||||||
5269 | } | ||||||
5270 | |||||||
5271 | src += 4; | ||||||
5272 | dst += 4; | ||||||
5273 | mask += 4; | ||||||
5274 | w -= 4; | ||||||
5275 | } | ||||||
5276 | |||||||
5277 | while (w) | ||||||
5278 | { | ||||||
5279 | uint32_t sa; | ||||||
5280 | |||||||
5281 | s = *src++; | ||||||
5282 | m = (*mask++) >> 24; | ||||||
5283 | d = *dst; | ||||||
5284 | |||||||
5285 | sa = s >> 24; | ||||||
5286 | |||||||
5287 | if (m) | ||||||
5288 | { | ||||||
5289 | if (sa == 0xff && m == 0xff) | ||||||
5290 | { | ||||||
5291 | *dst = s; | ||||||
5292 | } | ||||||
5293 | else | ||||||
5294 | { | ||||||
5295 | __m128i ms, md, ma, msa; | ||||||
5296 | |||||||
5297 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | ||||||
5298 | ms = unpack_32_1x128 (s); | ||||||
5299 | md = unpack_32_1x128 (d); | ||||||
5300 | |||||||
5301 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | ||||||
5302 | |||||||
5303 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | ||||||
5304 | } | ||||||
5305 | } | ||||||
5306 | |||||||
5307 | dst++; | ||||||
5308 | w--; | ||||||
5309 | } | ||||||
5310 | } | ||||||
5311 | |||||||
5312 | } | ||||||
5313 | |||||||
5314 | /* A variant of 'sse2_combine_over_u' with minor tweaks */ | ||||||
5315 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
5316 | scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd, | ||||||
5317 | const uint32_t* ps, | ||||||
5318 | int32_t w, | ||||||
5319 | pixman_fixed_t vx, | ||||||
5320 | pixman_fixed_t unit_x, | ||||||
5321 | pixman_fixed_t src_width_fixed, | ||||||
5322 | pixman_bool_t fully_transparent_src) | ||||||
5323 | { | ||||||
5324 | uint32_t s, d; | ||||||
5325 | const uint32_t* pm = NULL((void*)0); | ||||||
5326 | |||||||
5327 | __m128i xmm_dst_lo, xmm_dst_hi; | ||||||
5328 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
5329 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
5330 | |||||||
5331 | if (fully_transparent_src) | ||||||
5332 | return; | ||||||
5333 | |||||||
5334 | /* Align dst on a 16-byte boundary */ | ||||||
5335 | while (w && ((uintptr_t)pd & 15)) | ||||||
5336 | { | ||||||
5337 | d = *pd; | ||||||
5338 | s = combine1 (ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)), pm); | ||||||
5339 | vx += unit_x; | ||||||
5340 | while (vx >= 0) | ||||||
5341 | vx -= src_width_fixed; | ||||||
5342 | |||||||
5343 | *pd++ = core_combine_over_u_pixel_sse2 (s, d); | ||||||
5344 | if (pm) | ||||||
5345 | pm++; | ||||||
5346 | w--; | ||||||
5347 | } | ||||||
5348 | |||||||
5349 | while (w >= 4) | ||||||
5350 | { | ||||||
5351 | __m128i tmp; | ||||||
5352 | uint32_t tmp1, tmp2, tmp3, tmp4; | ||||||
5353 | |||||||
5354 | tmp1 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5355 | vx += unit_x; | ||||||
5356 | while (vx >= 0) | ||||||
5357 | vx -= src_width_fixed; | ||||||
5358 | tmp2 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5359 | vx += unit_x; | ||||||
5360 | while (vx >= 0) | ||||||
5361 | vx -= src_width_fixed; | ||||||
5362 | tmp3 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5363 | vx += unit_x; | ||||||
5364 | while (vx >= 0) | ||||||
5365 | vx -= src_width_fixed; | ||||||
5366 | tmp4 = *(ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5367 | vx += unit_x; | ||||||
5368 | while (vx >= 0) | ||||||
5369 | vx -= src_width_fixed; | ||||||
5370 | |||||||
5371 | tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); | ||||||
5372 | |||||||
5373 | xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm); | ||||||
5374 | |||||||
5375 | if (is_opaque (xmm_src_hi)) | ||||||
5376 | { | ||||||
5377 | save_128_aligned ((__m128i*)pd, xmm_src_hi); | ||||||
5378 | } | ||||||
5379 | else if (!is_zero (xmm_src_hi)) | ||||||
5380 | { | ||||||
5381 | xmm_dst_hi = load_128_aligned ((__m128i*) pd); | ||||||
5382 | |||||||
5383 | unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi); | ||||||
5384 | unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5385 | |||||||
5386 | expand_alpha_2x128 ( | ||||||
5387 | xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
5388 | |||||||
5389 | over_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
5390 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
5391 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
5392 | |||||||
5393 | /* rebuid the 4 pixel data and save*/ | ||||||
5394 | save_128_aligned ((__m128i*)pd, | ||||||
5395 | pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
5396 | } | ||||||
5397 | |||||||
5398 | w -= 4; | ||||||
5399 | pd += 4; | ||||||
5400 | if (pm) | ||||||
5401 | pm += 4; | ||||||
5402 | } | ||||||
5403 | |||||||
5404 | while (w) | ||||||
5405 | { | ||||||
5406 | d = *pd; | ||||||
5407 | s = combine1 (ps + pixman_fixed_to_int (vx)((int) ((vx) >> 16)), pm); | ||||||
5408 | vx += unit_x; | ||||||
5409 | while (vx >= 0) | ||||||
5410 | vx -= src_width_fixed; | ||||||
5411 | |||||||
5412 | *pd++ = core_combine_over_u_pixel_sse2 (s, d); | ||||||
5413 | if (pm) | ||||||
5414 | pm++; | ||||||
5415 | |||||||
5416 | w--; | ||||||
5417 | } | ||||||
5418 | } | ||||||
5419 | |||||||
5420 | FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = (( pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5421 | scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = (( pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5422 | uint32_t, uint32_t, COVER)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = (( pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_cover_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5423 | FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5424 | scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5425 | uint32_t, uint32_t, NONE)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_none_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5426 | FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5427 | scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5428 | uint32_t, uint32_t, PAD)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER ( pixman_implementation_t *imp, pixman_composite_info_t *info) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_pad_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5429 | FAST_NEAREST_MAINLOOP (sse2_8888_8888_normal_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5430 | scaled_nearest_scanline_sse2_8888_8888_OVER,static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5431 | uint32_t, uint32_t, NORMAL)static __inline__ __attribute__ ((__always_inline__)) void scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper ( const uint8_t *mask, uint32_t *dst, const uint32_t *src, int32_t w, pixman_fixed_t vx, pixman_fixed_t unit_x, pixman_fixed_t max_vx , pixman_bool_t fully_transparent_src) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); } static void fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (0) { if (0) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (0 && !0) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask + (0 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER_sse2_8888_8888_normal_OVER_wrapper (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5432 | |||||||
5433 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
5434 | scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask, | ||||||
5435 | uint32_t * dst, | ||||||
5436 | const uint32_t * src, | ||||||
5437 | int32_t w, | ||||||
5438 | pixman_fixed_t vx, | ||||||
5439 | pixman_fixed_t unit_x, | ||||||
5440 | pixman_fixed_t src_width_fixed, | ||||||
5441 | pixman_bool_t zero_src) | ||||||
5442 | { | ||||||
5443 | __m128i xmm_mask; | ||||||
5444 | __m128i xmm_src, xmm_src_lo, xmm_src_hi; | ||||||
5445 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
5446 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
5447 | |||||||
5448 | if (zero_src || (*mask >> 24) == 0) | ||||||
5449 | return; | ||||||
5450 | |||||||
5451 | xmm_mask = create_mask_16_128 (*mask >> 24); | ||||||
5452 | |||||||
5453 | while (w && (uintptr_t)dst & 15) | ||||||
5454 | { | ||||||
5455 | uint32_t s = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5456 | vx += unit_x; | ||||||
5457 | while (vx >= 0) | ||||||
5458 | vx -= src_width_fixed; | ||||||
5459 | |||||||
5460 | if (s) | ||||||
5461 | { | ||||||
5462 | uint32_t d = *dst; | ||||||
5463 | |||||||
5464 | __m128i ms = unpack_32_1x128 (s); | ||||||
5465 | __m128i alpha = expand_alpha_1x128 (ms); | ||||||
5466 | __m128i dest = xmm_mask; | ||||||
5467 | __m128i alpha_dst = unpack_32_1x128 (d); | ||||||
5468 | |||||||
5469 | *dst = pack_1x128_32 ( | ||||||
5470 | in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); | ||||||
5471 | } | ||||||
5472 | dst++; | ||||||
5473 | w--; | ||||||
5474 | } | ||||||
5475 | |||||||
5476 | while (w >= 4) | ||||||
5477 | { | ||||||
5478 | uint32_t tmp1, tmp2, tmp3, tmp4; | ||||||
5479 | |||||||
5480 | tmp1 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5481 | vx += unit_x; | ||||||
5482 | while (vx >= 0) | ||||||
5483 | vx -= src_width_fixed; | ||||||
5484 | tmp2 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5485 | vx += unit_x; | ||||||
5486 | while (vx >= 0) | ||||||
5487 | vx -= src_width_fixed; | ||||||
5488 | tmp3 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5489 | vx += unit_x; | ||||||
5490 | while (vx >= 0) | ||||||
5491 | vx -= src_width_fixed; | ||||||
5492 | tmp4 = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5493 | vx += unit_x; | ||||||
5494 | while (vx >= 0) | ||||||
5495 | vx -= src_width_fixed; | ||||||
5496 | |||||||
5497 | xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1); | ||||||
5498 | |||||||
5499 | if (!is_zero (xmm_src)) | ||||||
5500 | { | ||||||
5501 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
5502 | |||||||
5503 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
5504 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5505 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
5506 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
5507 | |||||||
5508 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
5509 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
5510 | &xmm_mask, &xmm_mask, | ||||||
5511 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
5512 | |||||||
5513 | save_128_aligned ( | ||||||
5514 | (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
5515 | } | ||||||
5516 | |||||||
5517 | dst += 4; | ||||||
5518 | w -= 4; | ||||||
5519 | } | ||||||
5520 | |||||||
5521 | while (w) | ||||||
5522 | { | ||||||
5523 | uint32_t s = *(src + pixman_fixed_to_int (vx)((int) ((vx) >> 16))); | ||||||
5524 | vx += unit_x; | ||||||
5525 | while (vx >= 0) | ||||||
5526 | vx -= src_width_fixed; | ||||||
5527 | |||||||
5528 | if (s) | ||||||
5529 | { | ||||||
5530 | uint32_t d = *dst; | ||||||
5531 | |||||||
5532 | __m128i ms = unpack_32_1x128 (s); | ||||||
5533 | __m128i alpha = expand_alpha_1x128 (ms); | ||||||
5534 | __m128i mask = xmm_mask; | ||||||
5535 | __m128i dest = unpack_32_1x128 (d); | ||||||
5536 | |||||||
5537 | *dst = pack_1x128_32 ( | ||||||
5538 | in_over_1x128 (&ms, &alpha, &mask, &dest)); | ||||||
5539 | } | ||||||
5540 | |||||||
5541 | dst++; | ||||||
5542 | w--; | ||||||
5543 | } | ||||||
5544 | |||||||
5545 | } | ||||||
5546 | |||||||
5547 | FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = (( pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5548 | scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = (( pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5549 | uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_NORMAL) { max_vy = (( pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL , &vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD , &y, src_image->bits.height); src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (-1 == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5550 | FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5551 | scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5552 | uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5553 | FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5554 | scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5555 | uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5556 | FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5557 | scaled_nearest_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5558 | uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)static void fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y; pixman_fixed_t src_width_fixed = ((pixman_fixed_t ) ((src_image->bits.width) << 16)); pixman_fixed_t max_vy ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, right_pad; uint32_t *src; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; do { uint32_t *__bits__ ; int __stride__; __bits__ = dest_image->bits.bits; __stride__ = dest_image->bits.rowstride; (dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line ) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y) + (1) * (dest_x); } while (0); if (1) { if (1) solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); else do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= ((pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { max_vy = ((pixman_fixed_t) ((src_image->bits.height) << 16)); repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed ); repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, &width, &left_pad, &right_pad); vx += left_pad * unit_x; } while (--height >= 0) { dst = dst_line ; dst_line += dst_stride; if (1 && !1) { mask = mask_line ; mask_line += mask_stride; } y = ((int) ((vy) >> 16)); vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD, &y, src_image ->bits.height); src = src_first_line + src_stride * y; if ( left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width - src_image->bits .width + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, src + src_image->bits.width, right_pad, -((pixman_fixed_t) 1) , 0, src_width_fixed, 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { static const uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.height) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad + width + right_pad, -((pixman_fixed_t ) 1), 0, src_width_fixed, 1); continue; } src = src_first_line + src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, zero + 1, left_pad, -((pixman_fixed_t) 1), 0, src_width_fixed , 1); } if (width > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad), dst + left_pad, src + src_image-> bits.width, width, vx - src_width_fixed, unit_x, src_width_fixed , 0); } if (right_pad > 0) { scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask + (1 ? 0 : left_pad + width), dst + left_pad + width, zero + 1, right_pad, -((pixman_fixed_t) 1), 0, src_width_fixed, 1 ); } } else { src = src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_n_8888_OVER (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed , unit_x, src_width_fixed, 0); } } } | ||||||
5559 | |||||||
5560 | #if PSHUFD_IS_FAST0 | ||||||
5561 | |||||||
5562 | /***********************************************************************************/ | ||||||
5563 | |||||||
5564 | # define BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)) \ | ||||||
5565 | const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ | ||||||
5566 | const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ | ||||||
5567 | const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ | ||||||
5568 | const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ | ||||||
5569 | unit_x, -unit_x, unit_x, -unit_x); \ | ||||||
5570 | const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ | ||||||
5571 | unit_x * 4, -unit_x * 4, \ | ||||||
5572 | unit_x * 4, -unit_x * 4, \ | ||||||
5573 | unit_x * 4, -unit_x * 4); \ | ||||||
5574 | const __m128i xmm_zero = _mm_setzero_si128 (); \ | ||||||
5575 | __m128i xmm_x = _mm_set_epi16 (vx + unit_x * 3, -(vx + 1) - unit_x * 3, \ | ||||||
5576 | vx + unit_x * 2, -(vx + 1) - unit_x * 2, \ | ||||||
5577 | vx + unit_x * 1, -(vx + 1) - unit_x * 1, \ | ||||||
5578 | vx + unit_x * 0, -(vx + 1) - unit_x * 0); \ | ||||||
5579 | __m128i xmm_wh_state; | ||||||
5580 | |||||||
5581 | #define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase_)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0) \ | ||||||
5582 | do { \ | ||||||
5583 | int phase = phase_; \ | ||||||
5584 | __m128i xmm_wh, xmm_a, xmm_b; \ | ||||||
5585 | /* fetch 2x2 pixel block into sse2 registers */ \ | ||||||
5586 | __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ | ||||||
5587 | __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ | ||||||
5588 | vx += unit_x; \ | ||||||
5589 | /* vertical interpolation */ \ | ||||||
5590 | xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ | ||||||
5591 | xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ | ||||||
5592 | xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ | ||||||
5593 | /* calculate horizontal weights */ \ | ||||||
5594 | if (phase <= 0) \ | ||||||
5595 | { \ | ||||||
5596 | xmm_wh_state = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ | ||||||
5597 | 16 - BILINEAR_INTERPOLATION_BITS7)); \ | ||||||
5598 | xmm_x = _mm_add_epi16 (xmm_x, (phase < 0) ? xmm_ux1 : xmm_ux4); \ | ||||||
5599 | phase = 0; \ | ||||||
5600 | } \ | ||||||
5601 | xmm_wh = _mm_shuffle_epi32 (xmm_wh_state, _MM_SHUFFLE (phase, phase, \__extension__ ({ (__m128i)__builtin_shufflevector((__v4si)(__m128i )(xmm_wh_state), (__v4si)_mm_setzero_si128(), ((((phase) << 6) | ((phase) << 4) | ((phase) << 2) | (phase))) & 0x3, (((((phase) << 6) | ((phase) << 4) | ( (phase) << 2) | (phase))) & 0xc) >> 2, (((((phase ) << 6) | ((phase) << 4) | ((phase) << 2) | (phase))) & 0x30) >> 4, (((((phase) << 6) | ( (phase) << 4) | ((phase) << 2) | (phase))) & 0xc0 ) >> 6); }) | ||||||
5602 | phase, phase))__extension__ ({ (__m128i)__builtin_shufflevector((__v4si)(__m128i )(xmm_wh_state), (__v4si)_mm_setzero_si128(), ((((phase) << 6) | ((phase) << 4) | ((phase) << 2) | (phase))) & 0x3, (((((phase) << 6) | ((phase) << 4) | ( (phase) << 2) | (phase))) & 0xc) >> 2, (((((phase ) << 6) | ((phase) << 4) | ((phase) << 2) | (phase))) & 0x30) >> 4, (((((phase) << 6) | ( (phase) << 4) | ((phase) << 2) | (phase))) & 0xc0 ) >> 6); }); \ | ||||||
5603 | /* horizontal interpolation */ \ | ||||||
5604 | xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (_mm_shuffle_epi32 ( \__extension__ ({ (__m128i)__builtin_shufflevector((__v4si)(__m128i )(xmm_a), (__v4si)_mm_setzero_si128(), ((((1) << 6) | ( (0) << 4) | ((3) << 2) | (2))) & 0x3, (((((1) << 6) | ((0) << 4) | ((3) << 2) | (2))) & 0xc) >> 2, (((((1) << 6) | ((0) << 4) | (( 3) << 2) | (2))) & 0x30) >> 4, (((((1) << 6) | ((0) << 4) | ((3) << 2) | (2))) & 0xc0) >> 6); }) | ||||||
5605 | xmm_a, _MM_SHUFFLE (1, 0, 3, 2))__extension__ ({ (__m128i)__builtin_shufflevector((__v4si)(__m128i )(xmm_a), (__v4si)_mm_setzero_si128(), ((((1) << 6) | ( (0) << 4) | ((3) << 2) | (2))) & 0x3, (((((1) << 6) | ((0) << 4) | ((3) << 2) | (2))) & 0xc) >> 2, (((((1) << 6) | ((0) << 4) | (( 3) << 2) | (2))) & 0x30) >> 4, (((((1) << 6) | ((0) << 4) | ((3) << 2) | (2))) & 0xc0) >> 6); }), xmm_a), xmm_wh); \ | ||||||
5606 | /* shift the result */ \ | ||||||
5607 | pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS7 * 2); \ | ||||||
5608 | } while (0) | ||||||
5609 | |||||||
5610 | #else /************************************************************************/ | ||||||
5611 | |||||||
5612 | # define BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)) \ | ||||||
5613 | const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); \ | ||||||
5614 | const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb); \ | ||||||
5615 | const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); \ | ||||||
5616 | const __m128i xmm_ux1 = _mm_set_epi16 (unit_x, -unit_x, unit_x, -unit_x, \ | ||||||
5617 | unit_x, -unit_x, unit_x, -unit_x); \ | ||||||
5618 | const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, \ | ||||||
5619 | unit_x * 4, -unit_x * 4, \ | ||||||
5620 | unit_x * 4, -unit_x * 4, \ | ||||||
5621 | unit_x * 4, -unit_x * 4); \ | ||||||
5622 | const __m128i xmm_zero = _mm_setzero_si128 (); \ | ||||||
5623 | __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), \ | ||||||
5624 | vx, -(vx + 1), vx, -(vx + 1)) | ||||||
5625 | |||||||
5626 | #define BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER(pix, phase)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0) \ | ||||||
5627 | do { \ | ||||||
5628 | __m128i xmm_wh, xmm_a, xmm_b; \ | ||||||
5629 | /* fetch 2x2 pixel block into sse2 registers */ \ | ||||||
5630 | __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); \ | ||||||
5631 | __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); \ | ||||||
5632 | (void)xmm_ux4; /* suppress warning: unused variable 'xmm_ux4' */ \ | ||||||
5633 | vx += unit_x; \ | ||||||
5634 | /* vertical interpolation */ \ | ||||||
5635 | xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); \ | ||||||
5636 | xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); \ | ||||||
5637 | xmm_a = _mm_add_epi16 (xmm_a, xmm_b); \ | ||||||
5638 | /* calculate horizontal weights */ \ | ||||||
5639 | xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, \ | ||||||
5640 | 16 - BILINEAR_INTERPOLATION_BITS7)); \ | ||||||
5641 | xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ | ||||||
5642 | /* horizontal interpolation */ \ | ||||||
5643 | xmm_b = _mm_unpacklo_epi64 (/* any value is fine here */ xmm_b, xmm_a); \ | ||||||
5644 | xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); \ | ||||||
5645 | /* shift the result */ \ | ||||||
5646 | pix = _mm_srli_epi32 (xmm_a, BILINEAR_INTERPOLATION_BITS7 * 2); \ | ||||||
5647 | } while (0) | ||||||
5648 | |||||||
5649 | /***********************************************************************************/ | ||||||
5650 | |||||||
5651 | #endif | ||||||
5652 | |||||||
5653 | #define BILINEAR_INTERPOLATE_ONE_PIXEL(pix); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix = _mm_cvtsi128_si32 (xmm_pix); } while(0); \ | ||||||
5654 | do { \ | ||||||
5655 | __m128i xmm_pix; \ | ||||||
5656 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix, -1)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2 ); } while (0); \ | ||||||
5657 | xmm_pix = _mm_packs_epi32 (xmm_pix, xmm_pix); \ | ||||||
5658 | xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); \ | ||||||
5659 | pix = _mm_cvtsi128_si32 (xmm_pix); \ | ||||||
5660 | } while(0) | ||||||
5661 | |||||||
5662 | #define BILINEAR_INTERPOLATE_FOUR_PIXELS(pix); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); \ | ||||||
5663 | do { \ | ||||||
5664 | __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; \ | ||||||
5665 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix1, 0)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); \ | ||||||
5666 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix2, 1)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); \ | ||||||
5667 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix3, 2)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); \ | ||||||
5668 | BILINEAR_INTERPOLATE_ONE_PIXEL_HELPER (xmm_pix4, 3)do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); \ | ||||||
5669 | xmm_pix1 = _mm_packs_epi32 (xmm_pix1, xmm_pix2); \ | ||||||
5670 | xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); \ | ||||||
5671 | pix = _mm_packus_epi16 (xmm_pix1, xmm_pix3); \ | ||||||
5672 | } while(0) | ||||||
5673 | |||||||
5674 | #define BILINEAR_SKIP_ONE_PIXEL()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while (0) \ | ||||||
5675 | do { \ | ||||||
5676 | vx += unit_x; \ | ||||||
5677 | xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); \ | ||||||
5678 | } while(0) | ||||||
5679 | |||||||
5680 | #define BILINEAR_SKIP_FOUR_PIXELS()do { vx += unit_x * 4; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4) ; } while(0) \ | ||||||
5681 | do { \ | ||||||
5682 | vx += unit_x * 4; \ | ||||||
5683 | xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4); \ | ||||||
5684 | } while(0) | ||||||
5685 | |||||||
5686 | /***********************************************************************************/ | ||||||
5687 | |||||||
5688 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
5689 | scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t * dst, | ||||||
5690 | const uint32_t * mask, | ||||||
5691 | const uint32_t * src_top, | ||||||
5692 | const uint32_t * src_bottom, | ||||||
5693 | int32_t w, | ||||||
5694 | int wt, | ||||||
5695 | int wb, | ||||||
5696 | pixman_fixed_t vx_, | ||||||
5697 | pixman_fixed_t unit_x_, | ||||||
5698 | pixman_fixed_t max_vx, | ||||||
5699 | pixman_bool_t zero_src) | ||||||
5700 | { | ||||||
5701 | intptr_t vx = vx_; | ||||||
5702 | intptr_t unit_x = unit_x_; | ||||||
5703 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | ||||||
5704 | uint32_t pix1, pix2; | ||||||
5705 | |||||||
5706 | while (w && ((uintptr_t)dst & 15)) | ||||||
5707 | { | ||||||
5708 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5709 | *dst++ = pix1; | ||||||
5710 | w--; | ||||||
5711 | } | ||||||
5712 | |||||||
5713 | while ((w -= 4) >= 0) { | ||||||
5714 | __m128i xmm_src; | ||||||
5715 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | ||||||
5716 | _mm_store_si128 ((__m128i *)dst, xmm_src); | ||||||
5717 | dst += 4; | ||||||
5718 | } | ||||||
5719 | |||||||
5720 | if (w & 2) | ||||||
5721 | { | ||||||
5722 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5723 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix2 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5724 | *dst++ = pix1; | ||||||
5725 | *dst++ = pix2; | ||||||
5726 | } | ||||||
5727 | |||||||
5728 | if (w & 1) | ||||||
5729 | { | ||||||
5730 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5731 | *dst = pix1; | ||||||
5732 | } | ||||||
5733 | |||||||
5734 | } | ||||||
5735 | |||||||
5736 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1)) ), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2 )) mask += left_tz; vx += left_tz * unit_x; } if (width > 0 ) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1 , src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width ; if ((0) & (1 << 2)) mask += width; vx += width * unit_x ; } if (right_tz > 0) { buf1[0] = src1[src_image->bits. width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5737 | scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1)) ), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2 )) mask += left_tz; vx += left_tz * unit_x; } if (width > 0 ) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1 , src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width ; if ((0) & (1 << 2)) mask += width; vx += width * unit_x ; } if (right_tz > 0) { buf1[0] = src1[src_image->bits. width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5738 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1)) ), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2 )) mask += left_tz; vx += left_tz * unit_x; } if (width > 0 ) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1 , src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width ; if ((0) & (1 << 2)) mask += width; vx += width * unit_x ; } if (right_tz > 0) { buf1[0] = src1[src_image->bits. width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5739 | COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1)) ), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2 )) mask += left_tz; vx += left_tz * unit_x; } if (width > 0 ) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1 , src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width ; if ((0) & (1 << 2)) mask += width; vx += width * unit_x ; } if (right_tz > 0) { buf1[0] = src1[src_image->bits. width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5740 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5741 | scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5742 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5743 | PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5744 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2 [0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5745 | scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2 [0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5746 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2 [0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5747 | NONE, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2 [0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5748 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5749 | scaled_bilinear_scanline_sse2_8888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5750 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5751 | NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5752 | |||||||
5753 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
5754 | scaled_bilinear_scanline_sse2_x888_8888_SRC (uint32_t * dst, | ||||||
5755 | const uint32_t * mask, | ||||||
5756 | const uint32_t * src_top, | ||||||
5757 | const uint32_t * src_bottom, | ||||||
5758 | int32_t w, | ||||||
5759 | int wt, | ||||||
5760 | int wb, | ||||||
5761 | pixman_fixed_t vx_, | ||||||
5762 | pixman_fixed_t unit_x_, | ||||||
5763 | pixman_fixed_t max_vx, | ||||||
5764 | pixman_bool_t zero_src) | ||||||
5765 | { | ||||||
5766 | intptr_t vx = vx_; | ||||||
5767 | intptr_t unit_x = unit_x_; | ||||||
5768 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | ||||||
5769 | uint32_t pix1, pix2; | ||||||
5770 | |||||||
5771 | while (w && ((uintptr_t)dst & 15)) | ||||||
5772 | { | ||||||
5773 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5774 | *dst++ = pix1 | 0xFF000000; | ||||||
5775 | w--; | ||||||
5776 | } | ||||||
5777 | |||||||
5778 | while ((w -= 4) >= 0) { | ||||||
5779 | __m128i xmm_src; | ||||||
5780 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | ||||||
5781 | _mm_store_si128 ((__m128i *)dst, _mm_or_si128 (xmm_src, mask_ff000000)); | ||||||
5782 | dst += 4; | ||||||
5783 | } | ||||||
5784 | |||||||
5785 | if (w & 2) | ||||||
5786 | { | ||||||
5787 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5788 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix2); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix2 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5789 | *dst++ = pix1 | 0xFF000000; | ||||||
5790 | *dst++ = pix2 | 0xFF000000; | ||||||
5791 | } | ||||||
5792 | |||||||
5793 | if (w & 1) | ||||||
5794 | { | ||||||
5795 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5796 | *dst = pix1 | 0xFF000000; | ||||||
5797 | } | ||||||
5798 | } | ||||||
5799 | |||||||
5800 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_cover_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1)) ), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2 )) mask += left_tz; vx += left_tz * unit_x; } if (width > 0 ) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1 , src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width ; if ((0) & (1 << 2)) mask += width; vx += width * unit_x ; } if (right_tz > 0) { buf1[0] = src1[src_image->bits. width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5801 | scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1)) ), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2 )) mask += left_tz; vx += left_tz * unit_x; } if (width > 0 ) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1 , src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width ; if ((0) & (1 << 2)) mask += width; vx += width * unit_x ; } if (right_tz > 0) { buf1[0] = src1[src_image->bits. width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5802 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1)) ), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2 )) mask += left_tz; vx += left_tz * unit_x; } if (width > 0 ) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1 , src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width ; if ((0) & (1 << 2)) mask += width; vx += width * unit_x ; } if (right_tz > 0) { buf1[0] = src1[src_image->bits. width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5803 | COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1, * src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad ; if ((0) & (1 << 2)) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2[0] = 0; buf2[ 1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & (( ((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1)) ), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2 )) mask += left_tz; vx += left_tz * unit_x; } if (width > 0 ) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1 , src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width ; if ((0) & (1 << 2)) mask += width; vx += width * unit_x ; } if (right_tz > 0) { buf1[0] = src1[src_image->bits. width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5804 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_pad_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5805 | scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5806 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5807 | PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst , mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits. height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5808 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_x888_8888_normal_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
Within the expansion of the macro 'FAST_BILINEAR_MAINLOOP_COMMON':
| |||||||
5809 | scaled_bilinear_scanline_sse2_x888_8888_SRC,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5810 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5811 | NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, 0 ); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image-> bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits .width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC ( dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_x888_8888_SRC (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5812 | |||||||
5813 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
5814 | scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t * dst, | ||||||
5815 | const uint32_t * mask, | ||||||
5816 | const uint32_t * src_top, | ||||||
5817 | const uint32_t * src_bottom, | ||||||
5818 | int32_t w, | ||||||
5819 | int wt, | ||||||
5820 | int wb, | ||||||
5821 | pixman_fixed_t vx_, | ||||||
5822 | pixman_fixed_t unit_x_, | ||||||
5823 | pixman_fixed_t max_vx, | ||||||
5824 | pixman_bool_t zero_src) | ||||||
5825 | { | ||||||
5826 | intptr_t vx = vx_; | ||||||
5827 | intptr_t unit_x = unit_x_; | ||||||
5828 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | ||||||
5829 | uint32_t pix1, pix2; | ||||||
5830 | |||||||
5831 | while (w && ((uintptr_t)dst & 15)) | ||||||
5832 | { | ||||||
5833 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5834 | |||||||
5835 | if (pix1) | ||||||
5836 | { | ||||||
5837 | pix2 = *dst; | ||||||
5838 | *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); | ||||||
5839 | } | ||||||
5840 | |||||||
5841 | w--; | ||||||
5842 | dst++; | ||||||
5843 | } | ||||||
5844 | |||||||
5845 | while (w >= 4) | ||||||
5846 | { | ||||||
5847 | __m128i xmm_src; | ||||||
5848 | __m128i xmm_src_hi, xmm_src_lo, xmm_dst_hi, xmm_dst_lo; | ||||||
5849 | __m128i xmm_alpha_hi, xmm_alpha_lo; | ||||||
5850 | |||||||
5851 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | ||||||
5852 | |||||||
5853 | if (!is_zero (xmm_src)) | ||||||
5854 | { | ||||||
5855 | if (is_opaque (xmm_src)) | ||||||
5856 | { | ||||||
5857 | save_128_aligned ((__m128i *)dst, xmm_src); | ||||||
5858 | } | ||||||
5859 | else | ||||||
5860 | { | ||||||
5861 | __m128i xmm_dst = load_128_aligned ((__m128i *)dst); | ||||||
5862 | |||||||
5863 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
5864 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5865 | |||||||
5866 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
5867 | over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
5868 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
5869 | |||||||
5870 | save_128_aligned ((__m128i *)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
5871 | } | ||||||
5872 | } | ||||||
5873 | |||||||
5874 | w -= 4; | ||||||
5875 | dst += 4; | ||||||
5876 | } | ||||||
5877 | |||||||
5878 | while (w) | ||||||
5879 | { | ||||||
5880 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5881 | |||||||
5882 | if (pix1) | ||||||
5883 | { | ||||||
5884 | pix2 = *dst; | ||||||
5885 | *dst = core_combine_over_u_pixel_sse2 (pix1, pix2); | ||||||
5886 | } | ||||||
5887 | |||||||
5888 | w--; | ||||||
5889 | dst++; | ||||||
5890 | } | ||||||
5891 | } | ||||||
5892 | |||||||
5893 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5894 | scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5895 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5896 | COVER, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5897 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5898 | scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5899 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5900 | PAD, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER ( dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0 ); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5901 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2 [0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5902 | scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2 [0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5903 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2 [0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5904 | NONE, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2 [0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5905 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5906 | scaled_bilinear_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5907 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5908 | NORMAL, FLAG_NONE)static void fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((0) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((0) & (1 << 2)) { do { uint32_t *__bits__; int __stride__ ; __bits__ = mask_image->bits.bits; __stride__ = mask_image ->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image ->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride ) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (src_first_line) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t ) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16 ))) / 2; v.vector[1] = ((pixman_fixed_t) ((src_y) << 16 )) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if (!pixman_transform_point_3d (src_image->common.transform, &v)) return; unit_x = src_image ->common.transform->matrix[0][0]; unit_y = src_image-> common.transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((0) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int) (( vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight ( vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2 ; } else { y2 = y1; weight1 = weight2 = (1 << 7) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width ; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image ->bits.width - 1]; buf2[0] = buf2[1] = src2[src_image-> bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((0) & (1 << 2)) mask += left_pad ; } if (left_tz > 0) { buf1[0] = 0; buf1[1] = src1[0]; buf2 [0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((0) & (1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask , src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((0) & (1 << 2)) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image ->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image-> bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((0) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1[0] = buf1 [1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & ( 1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((0) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
5909 | |||||||
5910 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
5911 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t * dst, | ||||||
5912 | const uint8_t * mask, | ||||||
5913 | const uint32_t * src_top, | ||||||
5914 | const uint32_t * src_bottom, | ||||||
5915 | int32_t w, | ||||||
5916 | int wt, | ||||||
5917 | int wb, | ||||||
5918 | pixman_fixed_t vx_, | ||||||
5919 | pixman_fixed_t unit_x_, | ||||||
5920 | pixman_fixed_t max_vx, | ||||||
5921 | pixman_bool_t zero_src) | ||||||
5922 | { | ||||||
5923 | intptr_t vx = vx_; | ||||||
5924 | intptr_t unit_x = unit_x_; | ||||||
5925 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | ||||||
5926 | uint32_t pix1, pix2; | ||||||
5927 | uint32_t m; | ||||||
5928 | |||||||
5929 | while (w && ((uintptr_t)dst & 15)) | ||||||
5930 | { | ||||||
5931 | uint32_t sa; | ||||||
5932 | |||||||
5933 | m = (uint32_t) *mask++; | ||||||
5934 | |||||||
5935 | if (m) | ||||||
5936 | { | ||||||
5937 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
5938 | sa = pix1 >> 24; | ||||||
5939 | |||||||
5940 | if (sa == 0xff && m == 0xff) | ||||||
5941 | { | ||||||
5942 | *dst = pix1; | ||||||
5943 | } | ||||||
5944 | else | ||||||
5945 | { | ||||||
5946 | __m128i ms, md, ma, msa; | ||||||
5947 | |||||||
5948 | pix2 = *dst; | ||||||
5949 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | ||||||
5950 | ms = unpack_32_1x128 (pix1); | ||||||
5951 | md = unpack_32_1x128 (pix2); | ||||||
5952 | |||||||
5953 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | ||||||
5954 | |||||||
5955 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | ||||||
5956 | } | ||||||
5957 | } | ||||||
5958 | else | ||||||
5959 | { | ||||||
5960 | BILINEAR_SKIP_ONE_PIXEL ()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while (0); | ||||||
5961 | } | ||||||
5962 | |||||||
5963 | w--; | ||||||
5964 | dst++; | ||||||
5965 | } | ||||||
5966 | |||||||
5967 | while (w >= 4) | ||||||
5968 | { | ||||||
5969 | __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi; | ||||||
5970 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
5971 | __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi; | ||||||
5972 | |||||||
5973 | m = *(uint32_t*)mask; | ||||||
5974 | |||||||
5975 | if (m) | ||||||
5976 | { | ||||||
5977 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | ||||||
5978 | |||||||
5979 | if (m == 0xffffffff && is_opaque (xmm_src)) | ||||||
5980 | { | ||||||
5981 | save_128_aligned ((__m128i *)dst, xmm_src); | ||||||
5982 | } | ||||||
5983 | else | ||||||
5984 | { | ||||||
5985 | xmm_dst = load_128_aligned ((__m128i *)dst); | ||||||
5986 | |||||||
5987 | xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128()); | ||||||
5988 | |||||||
5989 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
5990 | unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi); | ||||||
5991 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5992 | |||||||
5993 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi); | ||||||
5994 | expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi); | ||||||
5995 | |||||||
5996 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi, | ||||||
5997 | &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi); | ||||||
5998 | |||||||
5999 | save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
6000 | } | ||||||
6001 | } | ||||||
6002 | else | ||||||
6003 | { | ||||||
6004 | BILINEAR_SKIP_FOUR_PIXELS ()do { vx += unit_x * 4; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux4) ; } while(0); | ||||||
6005 | } | ||||||
6006 | |||||||
6007 | w -= 4; | ||||||
6008 | dst += 4; | ||||||
6009 | mask += 4; | ||||||
6010 | } | ||||||
6011 | |||||||
6012 | while (w) | ||||||
6013 | { | ||||||
6014 | uint32_t sa; | ||||||
6015 | |||||||
6016 | m = (uint32_t) *mask++; | ||||||
6017 | |||||||
6018 | if (m) | ||||||
6019 | { | ||||||
6020 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
6021 | sa = pix1 >> 24; | ||||||
6022 | |||||||
6023 | if (sa == 0xff && m == 0xff) | ||||||
6024 | { | ||||||
6025 | *dst = pix1; | ||||||
6026 | } | ||||||
6027 | else | ||||||
6028 | { | ||||||
6029 | __m128i ms, md, ma, msa; | ||||||
6030 | |||||||
6031 | pix2 = *dst; | ||||||
6032 | ma = expand_alpha_rev_1x128 (load_32_1x128 (m)); | ||||||
6033 | ms = unpack_32_1x128 (pix1); | ||||||
6034 | md = unpack_32_1x128 (pix2); | ||||||
6035 | |||||||
6036 | msa = expand_alpha_rev_1x128 (load_32_1x128 (sa)); | ||||||
6037 | |||||||
6038 | *dst = pack_1x128_32 (in_over_1x128 (&ms, &msa, &ma, &md)); | ||||||
6039 | } | ||||||
6040 | } | ||||||
6041 | else | ||||||
6042 | { | ||||||
6043 | BILINEAR_SKIP_ONE_PIXEL ()do { vx += unit_x; xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); } while (0); | ||||||
6044 | } | ||||||
6045 | |||||||
6046 | w--; | ||||||
6047 | dst++; | ||||||
6048 | } | ||||||
6049 | } | ||||||
6050 | |||||||
6051 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6052 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6053 | uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6054 | COVER, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6055 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6056 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6057 | uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6058 | PAD, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6059 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6060 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6061 | uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6062 | NONE, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6063 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6064 | scaled_bilinear_scanline_sse2_8888_8_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6065 | uint32_t, uint8_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6066 | NORMAL, FLAG_HAVE_NON_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint8_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint8_t solid_mask; const uint8_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 2 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 2) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t ); (mask_line) = ((uint8_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 2) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 2) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 2) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 2) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 2) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 2) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_8_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6067 | |||||||
6068 | static force_inline__inline__ __attribute__ ((__always_inline__)) void | ||||||
6069 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER (uint32_t * dst, | ||||||
6070 | const uint32_t * mask, | ||||||
6071 | const uint32_t * src_top, | ||||||
6072 | const uint32_t * src_bottom, | ||||||
6073 | int32_t w, | ||||||
6074 | int wt, | ||||||
6075 | int wb, | ||||||
6076 | pixman_fixed_t vx_, | ||||||
6077 | pixman_fixed_t unit_x_, | ||||||
6078 | pixman_fixed_t max_vx, | ||||||
6079 | pixman_bool_t zero_src) | ||||||
6080 | { | ||||||
6081 | intptr_t vx = vx_; | ||||||
6082 | intptr_t unit_x = unit_x_; | ||||||
6083 | BILINEAR_DECLARE_VARIABLESconst __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt); const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb , wb, wb, wb, wb); const __m128i xmm_addc = _mm_set_epi16 (0, 1, 0, 1, 0, 1, 0, 1); const __m128i xmm_ux1 = _mm_set_epi16 ( unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x, unit_x, -unit_x ); const __m128i xmm_ux4 = _mm_set_epi16 (unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4, unit_x * 4, -unit_x * 4); const __m128i xmm_zero = _mm_setzero_si128 (); __m128i xmm_x = _mm_set_epi16 (vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1), vx, -(vx + 1)); | ||||||
6084 | uint32_t pix1; | ||||||
6085 | __m128i xmm_mask; | ||||||
6086 | |||||||
6087 | if (zero_src || (*mask >> 24) == 0) | ||||||
6088 | return; | ||||||
6089 | |||||||
6090 | xmm_mask = create_mask_16_128 (*mask >> 24); | ||||||
6091 | |||||||
6092 | while (w && ((uintptr_t)dst & 15)) | ||||||
6093 | { | ||||||
6094 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
6095 | if (pix1) | ||||||
6096 | { | ||||||
6097 | uint32_t d = *dst; | ||||||
6098 | |||||||
6099 | __m128i ms = unpack_32_1x128 (pix1); | ||||||
6100 | __m128i alpha = expand_alpha_1x128 (ms); | ||||||
6101 | __m128i dest = xmm_mask; | ||||||
6102 | __m128i alpha_dst = unpack_32_1x128 (d); | ||||||
6103 | |||||||
6104 | *dst = pack_1x128_32 | ||||||
6105 | (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); | ||||||
6106 | } | ||||||
6107 | |||||||
6108 | dst++; | ||||||
6109 | w--; | ||||||
6110 | } | ||||||
6111 | |||||||
6112 | while (w >= 4) | ||||||
6113 | { | ||||||
6114 | __m128i xmm_src; | ||||||
6115 | BILINEAR_INTERPOLATE_FOUR_PIXELS (xmm_src); do { __m128i xmm_pix1, xmm_pix2, xmm_pix3, xmm_pix4; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr , xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b ); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 ( xmm_b, xmm_a), xmm_wh); xmm_pix1 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom[vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix2 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix3 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); do { __m128i xmm_wh, xmm_a, xmm_b ; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix4 = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix1 = _mm_packs_epi32 (xmm_pix1 , xmm_pix2); xmm_pix3 = _mm_packs_epi32 (xmm_pix3, xmm_pix4); xmm_src = _mm_packus_epi16 (xmm_pix1, xmm_pix3); } while(0); | ||||||
6116 | |||||||
6117 | if (!is_zero (xmm_src)) | ||||||
6118 | { | ||||||
6119 | __m128i xmm_src_lo, xmm_src_hi; | ||||||
6120 | __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi; | ||||||
6121 | __m128i xmm_alpha_lo, xmm_alpha_hi; | ||||||
6122 | |||||||
6123 | xmm_dst = load_128_aligned ((__m128i*)dst); | ||||||
6124 | |||||||
6125 | unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi); | ||||||
6126 | unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi); | ||||||
6127 | expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, | ||||||
6128 | &xmm_alpha_lo, &xmm_alpha_hi); | ||||||
6129 | |||||||
6130 | in_over_2x128 (&xmm_src_lo, &xmm_src_hi, | ||||||
6131 | &xmm_alpha_lo, &xmm_alpha_hi, | ||||||
6132 | &xmm_mask, &xmm_mask, | ||||||
6133 | &xmm_dst_lo, &xmm_dst_hi); | ||||||
6134 | |||||||
6135 | save_128_aligned | ||||||
6136 | ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); | ||||||
6137 | } | ||||||
6138 | |||||||
6139 | dst += 4; | ||||||
6140 | w -= 4; | ||||||
6141 | } | ||||||
6142 | |||||||
6143 | while (w) | ||||||
6144 | { | ||||||
6145 | BILINEAR_INTERPOLATE_ONE_PIXEL (pix1); do { __m128i xmm_pix; do { __m128i xmm_wh, xmm_a, xmm_b; __m128i tltr = _mm_loadl_epi64 ((__m128i *)&src_top[vx >> 16 ]); __m128i blbr = _mm_loadl_epi64 ((__m128i *)&src_bottom [vx >> 16]); (void)xmm_ux4; vx += unit_x; xmm_a = _mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero), xmm_wt); xmm_b = _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero), xmm_wb); xmm_a = _mm_add_epi16 (xmm_a, xmm_b); xmm_wh = _mm_add_epi16 (xmm_addc, _mm_srli_epi16 (xmm_x, 16 - 7)); xmm_x = _mm_add_epi16 (xmm_x, xmm_ux1); xmm_b = _mm_unpacklo_epi64 ( xmm_b, xmm_a); xmm_a = _mm_madd_epi16 (_mm_unpackhi_epi16 (xmm_b, xmm_a), xmm_wh); xmm_pix = _mm_srli_epi32 (xmm_a, 7 * 2); } while (0); xmm_pix = _mm_packs_epi32 (xmm_pix , xmm_pix); xmm_pix = _mm_packus_epi16 (xmm_pix, xmm_pix); pix1 = _mm_cvtsi128_si32 (xmm_pix); } while(0); | ||||||
6146 | if (pix1) | ||||||
6147 | { | ||||||
6148 | uint32_t d = *dst; | ||||||
6149 | |||||||
6150 | __m128i ms = unpack_32_1x128 (pix1); | ||||||
6151 | __m128i alpha = expand_alpha_1x128 (ms); | ||||||
6152 | __m128i dest = xmm_mask; | ||||||
6153 | __m128i alpha_dst = unpack_32_1x128 (d); | ||||||
6154 | |||||||
6155 | *dst = pack_1x128_32 | ||||||
6156 | (in_over_1x128 (&ms, &alpha, &dest, &alpha_dst)); | ||||||
6157 | } | ||||||
6158 | |||||||
6159 | dst++; | ||||||
6160 | w--; | ||||||
6161 | } | ||||||
6162 | } | ||||||
6163 | |||||||
6164 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6165 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6166 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6167 | COVER, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, &left_pad , &left_tz, &width, &right_tz, &right_pad); if (-1 == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (-1 == PIXMAN_REPEAT_NORMAL) { vx = v.vector[0] ; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) (( src_image->bits.width) << 16))); max_x = ((int) ((vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (-1 == PIXMAN_REPEAT_PAD) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; repeat (PIXMAN_REPEAT_PAD , &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_PAD , &y2, src_image->bits.height); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2 [1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { uint32_t *src1 , *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image->bits.height ) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image-> bits.height) { weight2 = 0; y2 = src_image->bits.height - 1 ; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (-1 == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels ; int32_t width_remain; uint32_t * src_line_top; uint32_t * src_line_bottom ; uint32_t buf1[2]; uint32_t buf2[2]; uint32_t extended_src_line0 [64*2]; uint32_t extended_src_line1[64*2]; int i, j; repeat ( PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i< src_width;) { for (j=0; j<src_image->bits.width; j++, i ++) { extended_src_line0[i] = src_line_top[j]; extended_src_line1 [i] = src_line_bottom[j]; } } src_line_top = &extended_src_line0 [0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2 [0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom [0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6168 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6169 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6170 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6171 | PAD, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6172 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6173 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6174 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6175 | NONE, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { vx = v.vector [0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2]; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image-> bits.height) { weight1 = 0; y1 = src_image->bits.height - 1 ; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6176 | FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_n_8888_normal_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6177 | scaled_bilinear_scanline_sse2_8888_n_8888_OVER,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6178 | uint32_t, uint32_t, uint32_t,static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6179 | NORMAL, FLAG_HAVE_SOLID_MASK)static void fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER (pixman_implementation_t *imp, pixman_composite_info_t *info ) { __attribute__((unused)) pixman_op_t op = info->op; __attribute__ ((unused)) pixman_image_t * src_image = info->src_image; __attribute__ ((unused)) pixman_image_t * mask_image = info->mask_image; __attribute__((unused)) pixman_image_t * dest_image = info-> dest_image; __attribute__((unused)) int32_t src_x = info-> src_x; __attribute__((unused)) int32_t src_y = info->src_y ; __attribute__((unused)) int32_t mask_x = info->mask_x; __attribute__ ((unused)) int32_t mask_y = info->mask_y; __attribute__((unused )) int32_t dest_x = info->dest_x; __attribute__((unused)) int32_t dest_y = info->dest_y; __attribute__((unused)) int32_t width = info->width; __attribute__((unused)) int32_t height = info ->height; uint32_t *dst_line; uint32_t *mask_line; uint32_t *src_first_line; int y1, y2; pixman_fixed_t max_vx = 2147483647 ; pixman_vector_t v; pixman_fixed_t vx, vy; pixman_fixed_t unit_x , unit_y; int32_t left_pad, left_tz, right_tz, right_pad; uint32_t *dst; uint32_t solid_mask; const uint32_t *mask = &solid_mask ; int src_stride, mask_stride, dst_stride; int src_width; pixman_fixed_t src_width_fixed; int max_x; pixman_bool_t need_src_extension ; do { uint32_t *__bits__; int __stride__; __bits__ = dest_image ->bits.bits; __stride__ = dest_image->bits.rowstride; ( dst_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t); (dst_line) = ((uint32_t *) __bits__) + (dst_stride ) * (dest_y) + (1) * (dest_x); } while (0); if ((1 << 1 ) & (1 << 1)) { solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); mask_stride = 0; } else if ((1 << 1) & (1 << 2)) { do { uint32_t *__bits__; int __stride__; __bits__ = mask_image->bits.bits ; __stride__ = mask_image->bits.rowstride; (mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t ); (mask_line) = ((uint32_t *) __bits__) + (mask_stride) * (mask_y ) + (1) * (mask_x); } while (0); } do { uint32_t *__bits__; int __stride__; __bits__ = src_image->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride) = __stride__ * (int ) sizeof (uint32_t) / (int) sizeof (uint32_t); (src_first_line ) = ((uint32_t *) __bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t) ((1) << 16))) / 2; v.vector[ 1] = ((pixman_fixed_t) ((src_y) << 16)) + (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[2] = (((pixman_fixed_t) ( (1) << 16))); if (!pixman_transform_point_3d (src_image ->common.transform, &v)) return; unit_x = src_image-> common.transform->matrix[0][0]; unit_y = src_image->common .transform->matrix[1][1]; v.vector[0] -= (((pixman_fixed_t ) ((1) << 16))) / 2; v.vector[1] -= (((pixman_fixed_t) ( (1) << 16))) / 2; vy = v.vector[1]; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { bilinear_pad_repeat_get_scanline_bounds (src_image->bits .width, v.vector[0], unit_x, &left_pad, &left_tz, & width, &right_tz, &right_pad); if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD) { left_pad += left_tz; right_pad += right_tz ; left_tz = right_tz = 0; } v.vector[0] += left_pad * unit_x; } if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL) { vx = v .vector[0]; repeat (PIXMAN_REPEAT_NORMAL, &vx, ((pixman_fixed_t ) ((src_image->bits.width) << 16))); max_x = ((int) ( (vx + (width - 1) * (int64_t)unit_x) >> 16)) + 1; if (src_image ->bits.width < 64) { src_width = 0; while (src_width < 64 && src_width <= max_x) src_width += src_image-> bits.width; need_src_extension = 1; } else { src_width = src_image ->bits.width; need_src_extension = 0; } src_width_fixed = ( (pixman_fixed_t) ((src_width) << 16)); } while (--height >= 0) { int weight1, weight2; dst = dst_line; dst_line += dst_stride; vx = v.vector[0]; if ((1 << 1) & (1 << 2)) { mask = mask_line; mask_line += mask_stride; } y1 = ((int ) ((vy) >> 16)); weight2 = pixman_fixed_to_bilinear_weight (vy); if (weight2) { y2 = y1 + 1; weight1 = (1 << 7) - weight2; } else { y2 = y1; weight1 = weight2 = (1 << 7 ) / 2; } vy += unit_y; if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_PAD ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height ); repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height ); src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = src1[0]; buf2[0] = buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 0); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; } if (right_pad > 0) { buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; buf2[0] = buf2[1] = src2 [src_image->bits.width - 1]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 0); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NONE ) { uint32_t *src1, *src2; uint32_t buf1[2]; uint32_t buf2[2] ; if (y1 < 0) { weight1 = 0; y1 = 0; } if (y1 >= src_image ->bits.height) { weight1 = 0; y1 = src_image->bits.height - 1; } if (y2 < 0) { weight2 = 0; y2 = 0; } if (y2 >= src_image ->bits.height) { weight2 = 0; y2 = src_image->bits.height - 1; } src1 = src_first_line + src_stride * y1; src2 = src_first_line + src_stride * y2; if (left_pad > 0) { buf1[0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, 1); dst += left_pad; if ((1 << 1) & (1 << 2) ) mask += left_pad; } if (left_tz > 0) { buf1[0] = 0; buf1 [1] = src1[0]; buf2[0] = 0; buf2[1] = src2[0]; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, left_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += left_tz; if ((1 << 1) & ( 1 << 2)) mask += left_tz; vx += left_tz * unit_x; } if ( width > 0) { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src1, src2, width, weight1, weight2, vx, unit_x, 0, 0); dst += width; if ((1 << 1) & (1 << 2) ) mask += width; vx += width * unit_x; } if (right_tz > 0) { buf1[0] = src1[src_image->bits.width - 1]; buf1[1] = 0; buf2[0] = src2[src_image->bits.width - 1]; buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_tz, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, 0, 0); dst += right_tz; if ((1 << 1) & (1 << 2)) mask += right_tz; } if (right_pad > 0) { buf1 [0] = buf1[1] = 0; buf2[0] = buf2[1] = 0; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, right_pad, weight1, weight2, 0, 0, 0 , 1); } } else if (PIXMAN_REPEAT_NORMAL == PIXMAN_REPEAT_NORMAL ) { int32_t num_pixels; int32_t width_remain; uint32_t * src_line_top ; uint32_t * src_line_bottom; uint32_t buf1[2]; uint32_t buf2 [2]; uint32_t extended_src_line0[64*2]; uint32_t extended_src_line1 [64*2]; int i, j; repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image ->bits.height); repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image ->bits.height); src_line_top = src_first_line + src_stride * y1; src_line_bottom = src_first_line + src_stride * y2; if (need_src_extension) { for (i=0; i<src_width;) { for (j=0 ; j<src_image->bits.width; j++, i++) { extended_src_line0 [i] = src_line_top[j]; extended_src_line1[i] = src_line_bottom [j]; } } src_line_top = &extended_src_line0[0]; src_line_bottom = &extended_src_line1[0]; } buf1[0] = src_line_top[src_width - 1]; buf1[1] = src_line_top[0]; buf2[0] = src_line_bottom[src_width - 1]; buf2[1] = src_line_bottom[0]; width_remain = width; while (width_remain > 0) { repeat (PIXMAN_REPEAT_NORMAL, &vx , src_width_fixed); if (((int) ((vx) >> 16)) == src_width - 1) { num_pixels = ((src_width_fixed - vx - ((pixman_fixed_t ) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, buf1, buf2, num_pixels, weight1, weight2, ((vx) & ((((pixman_fixed_t) ((1) << 16))) - ((pixman_fixed_t) 1 ))), unit_x, src_width_fixed, 0); width_remain -= num_pixels; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; repeat (PIXMAN_REPEAT_NORMAL , &vx, src_width_fixed); } if (((int) ((vx) >> 16)) != src_width - 1 && width_remain > 0) { num_pixels = ((src_width_fixed - (((pixman_fixed_t) ((1) << 16))) - vx - ((pixman_fixed_t) 1)) / unit_x) + 1; if (num_pixels > width_remain) num_pixels = width_remain; scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_line_top, src_line_bottom, num_pixels, weight1 , weight2, vx, unit_x, src_width_fixed, 0); width_remain -= num_pixels ; vx += num_pixels * unit_x; dst += num_pixels; if ((1 << 1) & (1 << 2)) mask += num_pixels; } } } else { scaled_bilinear_scanline_sse2_8888_n_8888_OVER (dst, mask, src_first_line + src_stride * y1, src_first_line + src_stride * y2, width, weight1, weight2, vx, unit_x, max_vx , 0); } } } | ||||||
6180 | |||||||
6181 | static const pixman_fast_path_t sse2_fast_paths[] = | ||||||
6182 | { | ||||||
6183 | /* PIXMAN_OP_OVER */ | ||||||
6184 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_0565 }, | ||||||
6185 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_0565 }, | ||||||
6186 | PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888 }, | ||||||
6187 | PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888 }, | ||||||
6188 | PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | ( 1 << 1) | (1 << 6)), sse2_composite_over_n_0565 }, | ||||||
6189 | PIXMAN_STD_FAST_PATH (OVER, solid, null, b5g6r5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | ( 1 << 1) | (1 << 6)), sse2_composite_over_n_0565 }, | ||||||
6190 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888 }, | ||||||
6191 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888 }, | ||||||
6192 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888 }, | ||||||
6193 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888 }, | ||||||
6194 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_0565 }, | ||||||
6195 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_0565 }, | ||||||
6196 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888 }, | ||||||
6197 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888 }, | ||||||
6198 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888 }, | ||||||
6199 | PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888 }, | ||||||
6200 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ( (PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888_8888 }, | ||||||
6201 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888 }, | ||||||
6202 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888 }, | ||||||
6203 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888 }, | ||||||
6204 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888 }, | ||||||
6205 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888 }, | ||||||
6206 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888 }, | ||||||
6207 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888 }, | ||||||
6208 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6 )) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) ))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0) ))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888 }, | ||||||
6209 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888 }, | ||||||
6210 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888 }, | ||||||
6211 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888 }, | ||||||
6212 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_x888_n_8888 }, | ||||||
6213 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888 }, | ||||||
6214 | PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888 }, | ||||||
6215 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888 }, | ||||||
6216 | PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_n_8888 }, | ||||||
6217 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca }, | ||||||
6218 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca }, | ||||||
6219 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca }, | ||||||
6220 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca }, | ||||||
6221 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_r5g6b5, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_0565_ca }, | ||||||
6222 | PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_b5g6r5, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_0565_ca }, | ||||||
6223 | PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((2) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888 }, | ||||||
6224 | PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((2) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888 }, | ||||||
6225 | PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((3) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888 }, | ||||||
6226 | PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((3) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888 }, | ||||||
6227 | PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((2) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((2) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | ( 1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565 }, | ||||||
6228 | PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | (( 0) << 12) | ((0) << 8) | ((0) << 4) | ((0)) ), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((3) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((3) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | ( 1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565 }, | ||||||
6229 | PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6230 | PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6231 | |||||||
6232 | /* PIXMAN_OP_OVER_REVERSE */ | ||||||
6233 | PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | (( 0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888 }, | ||||||
6234 | PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | (( 0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888 }, | ||||||
6235 | |||||||
6236 | /* PIXMAN_OP_ADD */ | ||||||
6237 | PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 8))) , PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888_8888_ca }, | ||||||
6238 | PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8){ PIXMAN_OP_ADD, PIXMAN_a8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8_8 }, | ||||||
6239 | PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8888_8888 }, | ||||||
6240 | PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8888_8888 }, | ||||||
6241 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8 }, | ||||||
6242 | PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8 }, | ||||||
6243 | PIXMAN_STD_FAST_PATH (ADD, solid, null, x8r8g8b8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }, | ||||||
6244 | PIXMAN_STD_FAST_PATH (ADD, solid, null, a8r8g8b8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }, | ||||||
6245 | PIXMAN_STD_FAST_PATH (ADD, solid, null, x8b8g8r8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }, | ||||||
6246 | PIXMAN_STD_FAST_PATH (ADD, solid, null, a8b8g8r8, sse2_composite_add_n_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888 }, | ||||||
6247 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8r8g8b8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888 }, | ||||||
6248 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8r8g8b8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888 }, | ||||||
6249 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, x8b8g8r8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888 }, | ||||||
6250 | PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8b8g8r8, sse2_composite_add_n_8_8888){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8_8888 }, | ||||||
6251 | |||||||
6252 | /* PIXMAN_OP_SRC */ | ||||||
6253 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }, | ||||||
6254 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }, | ||||||
6255 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }, | ||||||
6256 | PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }, | ||||||
6257 | PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565 }, | ||||||
6258 | PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565 }, | ||||||
6259 | PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565 }, | ||||||
6260 | PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, sse2_composite_src_x888_0565){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_0565 }, | ||||||
6261 | PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_8888 }, | ||||||
6262 | PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_src_x888_8888 }, | ||||||
6263 | PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6264 | PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6265 | PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6266 | PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6267 | PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6268 | PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6269 | PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_r5g6b5, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_r5g6b5 == ( ((0) << 24) | ((1) << 16) | ((0) << 12) | ( (0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6270 | PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_b5g6r5, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_b5g6r5 == ( ((0) << 24) | ((1) << 16) | ((0) << 12) | ( (0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( ((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5, ( (1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_copy_area }, | ||||||
6271 | |||||||
6272 | /* PIXMAN_OP_IN */ | ||||||
6273 | PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8){ PIXMAN_OP_IN, PIXMAN_a8, (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_8_8 }, | ||||||
6274 | PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_n_8_8 }, | ||||||
6275 | PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))), (((0) << 24) | ((0) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_n_8 }, | ||||||
6276 | |||||||
6277 | SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER , }, | ||||||
6278 | SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER , }, | ||||||
6279 | SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER, }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER , }, | ||||||
6280 | SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER, }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_normal_OVER , }, | ||||||
6281 | |||||||
6282 | SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER , }, | ||||||
6283 | SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER , }, | ||||||
6284 | SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER , }, | ||||||
6285 | SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1 << 23), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_n_8888_normal_OVER , }, | ||||||
6286 | |||||||
6287 | SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | ||||||
6288 | SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | ||||||
6289 | SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | ||||||
6290 | SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | ||||||
6291 | SIMPLE_BILINEAR_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | ||||||
6292 | SIMPLE_BILINEAR_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_SRC , }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_SRC, }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_SRC , }, { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_SRC , }, | ||||||
6293 | |||||||
6294 | SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC , }, | ||||||
6295 | SIMPLE_BILINEAR_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_cover_SRC , }, | ||||||
6296 | SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC , }, | ||||||
6297 | SIMPLE_BILINEAR_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_pad_SRC , }, | ||||||
6298 | SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 3) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC , }, | ||||||
6299 | SIMPLE_BILINEAR_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, sse2_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (( 1 << 15) | (1 << 3) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_x888_8888_normal_SRC , }, | ||||||
6300 | |||||||
6301 | SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER , }, | ||||||
6302 | SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER , }, | ||||||
6303 | SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER , }, | ||||||
6304 | SIMPLE_BILINEAR_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8 , ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((0) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((0) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , 0, PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8888_normal_OVER , }, | ||||||
6305 | |||||||
6306 | SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER , }, | ||||||
6307 | SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER , }, | ||||||
6308 | SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER , }, | ||||||
6309 | SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16 ) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : (( 1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), (((0) << 24) | ((1) << 16) | ( (0) << 12) | ((0) << 8) | ((0) << 4) | ((0) )), (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), (((0) << 24) | ((1) << 16) | ((0 ) << 12) | ((0) << 8) | ((0) << 4) | ((0))) , (((((0) << 24) | ((1) << 16) | ((0) << 12 ) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8 ) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4 ) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_n_8888_normal_OVER , }, | ||||||
6310 | |||||||
6311 | SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER , }, | ||||||
6312 | SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER , }, | ||||||
6313 | SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER , }, | ||||||
6314 | SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | (1 << 24), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ( (0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_cover_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 14) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_none_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 14) | (1 << 4)) | (1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24 ) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_pad_OVER , }, { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | ( 1 << 1) | (1 << 19) | (1 << 5) | (1 << 6)) | ((1 << 15) | (1 << 3) | (1 << 4)) | ( 1 << 16)), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | ((1) << 16) | ((0) << 12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 23) | (1 << 11) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_bilinear_sse2_8888_8_8888_normal_OVER , }, | ||||||
6315 | |||||||
6316 | { PIXMAN_OP_NONE }, | ||||||
6317 | }; | ||||||
6318 | |||||||
6319 | static uint32_t * | ||||||
6320 | sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask) | ||||||
6321 | { | ||||||
6322 | int w = iter->width; | ||||||
6323 | __m128i ff000000 = mask_ff000000; | ||||||
6324 | uint32_t *dst = iter->buffer; | ||||||
6325 | uint32_t *src = (uint32_t *)iter->bits; | ||||||
6326 | |||||||
6327 | iter->bits += iter->stride; | ||||||
6328 | |||||||
6329 | while (w && ((uintptr_t)dst) & 0x0f) | ||||||
6330 | { | ||||||
6331 | *dst++ = (*src++) | 0xff000000; | ||||||
6332 | w--; | ||||||
6333 | } | ||||||
6334 | |||||||
6335 | while (w >= 4) | ||||||
6336 | { | ||||||
6337 | save_128_aligned ( | ||||||
6338 | (__m128i *)dst, _mm_or_si128 ( | ||||||
6339 | load_128_unaligned ((__m128i *)src), ff000000)); | ||||||
6340 | |||||||
6341 | dst += 4; | ||||||
6342 | src += 4; | ||||||
6343 | w -= 4; | ||||||
6344 | } | ||||||
6345 | |||||||
6346 | while (w) | ||||||
6347 | { | ||||||
6348 | *dst++ = (*src++) | 0xff000000; | ||||||
6349 | w--; | ||||||
6350 | } | ||||||
6351 | |||||||
6352 | return iter->buffer; | ||||||
6353 | } | ||||||
6354 | |||||||
6355 | static uint32_t * | ||||||
6356 | sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) | ||||||
6357 | { | ||||||
6358 | int w = iter->width; | ||||||
6359 | uint32_t *dst = iter->buffer; | ||||||
6360 | uint16_t *src = (uint16_t *)iter->bits; | ||||||
6361 | __m128i ff000000 = mask_ff000000; | ||||||
6362 | |||||||
6363 | iter->bits += iter->stride; | ||||||
6364 | |||||||
6365 | while (w && ((uintptr_t)dst) & 0x0f) | ||||||
6366 | { | ||||||
6367 | uint16_t s = *src++; | ||||||
6368 | |||||||
6369 | *dst++ = convert_0565_to_8888 (s); | ||||||
6370 | w--; | ||||||
6371 | } | ||||||
6372 | |||||||
6373 | while (w >= 8) | ||||||
6374 | { | ||||||
6375 | __m128i lo, hi, s; | ||||||
6376 | |||||||
6377 | s = _mm_loadu_si128 ((__m128i *)src); | ||||||
6378 | |||||||
6379 | lo = unpack_565_to_8888 (_mm_unpacklo_epi16 (s, _mm_setzero_si128 ())); | ||||||
6380 | hi = unpack_565_to_8888 (_mm_unpackhi_epi16 (s, _mm_setzero_si128 ())); | ||||||
6381 | |||||||
6382 | save_128_aligned ((__m128i *)(dst + 0), _mm_or_si128 (lo, ff000000)); | ||||||
6383 | save_128_aligned ((__m128i *)(dst + 4), _mm_or_si128 (hi, ff000000)); | ||||||
6384 | |||||||
6385 | dst += 8; | ||||||
6386 | src += 8; | ||||||
6387 | w -= 8; | ||||||
6388 | } | ||||||
6389 | |||||||
6390 | while (w) | ||||||
6391 | { | ||||||
6392 | uint16_t s = *src++; | ||||||
6393 | |||||||
6394 | *dst++ = convert_0565_to_8888 (s); | ||||||
6395 | w--; | ||||||
6396 | } | ||||||
6397 | |||||||
6398 | return iter->buffer; | ||||||
6399 | } | ||||||
6400 | |||||||
6401 | static uint32_t * | ||||||
6402 | sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) | ||||||
6403 | { | ||||||
6404 | int w = iter->width; | ||||||
6405 | uint32_t *dst = iter->buffer; | ||||||
6406 | uint8_t *src = iter->bits; | ||||||
6407 | __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6; | ||||||
6408 | |||||||
6409 | iter->bits += iter->stride; | ||||||
6410 | |||||||
6411 | while (w && (((uintptr_t)dst) & 15)) | ||||||
6412 | { | ||||||
6413 | *dst++ = *(src++) << 24; | ||||||
6414 | w--; | ||||||
6415 | } | ||||||
6416 | |||||||
6417 | while (w >= 16) | ||||||
6418 | { | ||||||
6419 | xmm0 = _mm_loadu_si128((__m128i *)src); | ||||||
6420 | |||||||
6421 | xmm1 = _mm_unpacklo_epi8 (_mm_setzero_si128(), xmm0); | ||||||
6422 | xmm2 = _mm_unpackhi_epi8 (_mm_setzero_si128(), xmm0); | ||||||
6423 | xmm3 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm1); | ||||||
6424 | xmm4 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm1); | ||||||
6425 | xmm5 = _mm_unpacklo_epi16 (_mm_setzero_si128(), xmm2); | ||||||
6426 | xmm6 = _mm_unpackhi_epi16 (_mm_setzero_si128(), xmm2); | ||||||
6427 | |||||||
6428 | _mm_store_si128(((__m128i *)(dst + 0)), xmm3); | ||||||
6429 | _mm_store_si128(((__m128i *)(dst + 4)), xmm4); | ||||||
6430 | _mm_store_si128(((__m128i *)(dst + 8)), xmm5); | ||||||
6431 | _mm_store_si128(((__m128i *)(dst + 12)), xmm6); | ||||||
6432 | |||||||
6433 | dst += 16; | ||||||
6434 | src += 16; | ||||||
6435 | w -= 16; | ||||||
6436 | } | ||||||
6437 | |||||||
6438 | while (w) | ||||||
6439 | { | ||||||
6440 | *dst++ = *(src++) << 24; | ||||||
6441 | w--; | ||||||
6442 | } | ||||||
6443 | |||||||
6444 | return iter->buffer; | ||||||
6445 | } | ||||||
6446 | |||||||
6447 | #define IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (1 << 0) | (1 << 25) | (1 << 23)) \ | ||||||
6448 | (FAST_PATH_STANDARD_FLAGS((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | FAST_PATH_ID_TRANSFORM(1 << 0) | \ | ||||||
6449 | FAST_PATH_BITS_IMAGE(1 << 25) | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST(1 << 23)) | ||||||
6450 | |||||||
6451 | static const pixman_iter_info_t sse2_iters[] = | ||||||
6452 | { | ||||||
6453 | { PIXMAN_x8r8g8b8, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (1 << 0) | (1 << 25) | (1 << 23)), ITER_NARROW, | ||||||
6454 | _pixman_iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL((void*)0) | ||||||
6455 | }, | ||||||
6456 | { PIXMAN_r5g6b5, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (1 << 0) | (1 << 25) | (1 << 23)), ITER_NARROW, | ||||||
6457 | _pixman_iter_init_bits_stride, sse2_fetch_r5g6b5, NULL((void*)0) | ||||||
6458 | }, | ||||||
6459 | { PIXMAN_a8, IMAGE_FLAGS(((1 << 2) | (1 << 5) | (1 << 1) | (1 << 6)) | (1 << 0) | (1 << 25) | (1 << 23)), ITER_NARROW, | ||||||
6460 | _pixman_iter_init_bits_stride, sse2_fetch_a8, NULL((void*)0) | ||||||
6461 | }, | ||||||
6462 | { PIXMAN_null(((0) << 24) | ((0) << 16) | ((0) << 12) | ( (0) << 8) | ((0) << 4) | ((0))) }, | ||||||
6463 | }; | ||||||
6464 | |||||||
6465 | #if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1) | ||||||
6466 | __attribute__((__force_align_arg_pointer__)) | ||||||
6467 | #endif | ||||||
6468 | pixman_implementation_t * | ||||||
6469 | _pixman_implementation_create_sse2 (pixman_implementation_t *fallback) | ||||||
6470 | { | ||||||
6471 | pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths); | ||||||
6472 | |||||||
6473 | /* SSE2 constants */ | ||||||
6474 | mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000); | ||||||
6475 | mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000); | ||||||
6476 | mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0); | ||||||
6477 | mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f); | ||||||
6478 | mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000); | ||||||
6479 | mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00); | ||||||
6480 | mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8); | ||||||
6481 | mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0); | ||||||
6482 | mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000); | ||||||
6483 | mask_0080 = create_mask_16_128 (0x0080); | ||||||
6484 | mask_00ff = create_mask_16_128 (0x00ff); | ||||||
6485 | mask_0101 = create_mask_16_128 (0x0101); | ||||||
6486 | mask_ffff = create_mask_16_128 (0xffff); | ||||||
6487 | mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000); | ||||||
6488 | mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000); | ||||||
6489 | mask_565_rb = create_mask_2x32_128 (0x00f800f8, 0x00f800f8); | ||||||
6490 | mask_565_pack_multiplier = create_mask_2x32_128 (0x20000004, 0x20000004); | ||||||
6491 | |||||||
6492 | /* Set up function pointers */ | ||||||
6493 | imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u; | ||||||
6494 | imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u; | ||||||
6495 | imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u; | ||||||
6496 | imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u; | ||||||
6497 | imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u; | ||||||
6498 | imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u; | ||||||
6499 | imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u; | ||||||
6500 | imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u; | ||||||
6501 | imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u; | ||||||
6502 | imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u; | ||||||
6503 | |||||||
6504 | imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u; | ||||||
6505 | |||||||
6506 | imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca; | ||||||
6507 | imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca; | ||||||
6508 | imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca; | ||||||
6509 | imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca; | ||||||
6510 | imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca; | ||||||
6511 | imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca; | ||||||
6512 | imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca; | ||||||
6513 | imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca; | ||||||
6514 | imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca; | ||||||
6515 | imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca; | ||||||
6516 | imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca; | ||||||
6517 | |||||||
6518 | imp->blt = sse2_blt; | ||||||
6519 | imp->fill = sse2_fill; | ||||||
6520 | |||||||
6521 | imp->iter_info = sse2_iters; | ||||||
6522 | |||||||
6523 | return imp; | ||||||
6524 | } |