Bug Summary

File:pixman/pixman-sse2.c
Location:line 3751, column 6
Description:Value stored to 'w' is never read

Annotated Source Code

1/*
2 * Copyright © 2008 Rodrigo Kumpera
3 * Copyright © 2008 André Tupinambá
4 *
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of Red Hat not be used in advertising or
10 * publicity pertaining to distribution of the software without specific,
11 * written prior permission. Red Hat makes no representations about the
12 * suitability of this software for any purpose. It is provided "as is"
13 * without express or implied warranty.
14 *
15 * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
16 * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17 * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
20 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
21 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
22 * SOFTWARE.
23 *
24 * Author: Rodrigo Kumpera (kumpera@gmail.com)
25 * André Tupinambá (andrelrt@gmail.com)
26 *
27 * Based on work by Owen Taylor and Søren Sandmann
28 */
29#ifdef HAVE_CONFIG_H1
30#include <config.h>
31#endif
32
33#include <mmintrin.h>
34#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */
35#include <emmintrin.h> /* for SSE2 intrinsics */
36#include "pixman-private.h"
37#include "pixman-combine32.h"
38#include "pixman-fast-path.h"
39
40#if defined(_MSC_VER) && defined(_M_AMD64)
41/* Windows 64 doesn't allow MMX to be used, so
42 * the pixman-x64-mmx-emulation.h file contains
43 * implementations of those MMX intrinsics that
44 * are used in the SSE2 implementation.
45 */
46# include "pixman-x64-mmx-emulation.h"
47#endif
48
49#ifdef USE_SSE21
50
51/* --------------------------------------------------------------------
52 * Locals
53 */
54
55static __m64 mask_x0080;
56static __m64 mask_x00ff;
57static __m64 mask_x0101;
58static __m64 mask_x_alpha;
59
60static __m64 mask_x565_rgb;
61static __m64 mask_x565_unpack;
62
63static __m128i mask_0080;
64static __m128i mask_00ff;
65static __m128i mask_0101;
66static __m128i mask_ffff;
67static __m128i mask_ff000000;
68static __m128i mask_alpha;
69
70static __m128i mask_565_r;
71static __m128i mask_565_g1, mask_565_g2;
72static __m128i mask_565_b;
73static __m128i mask_red;
74static __m128i mask_green;
75static __m128i mask_blue;
76
77static __m128i mask_565_fix_rb;
78static __m128i mask_565_fix_g;
79
80/* ----------------------------------------------------------------------
81 * SSE2 Inlines
82 */
83static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
84unpack_32_1x128 (uint32_t data)
85{
86 return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());
87}
88
89static force_inline__inline__ __attribute__ ((__always_inline__)) void
90unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi)
91{
92 *data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());
93 *data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
94}
95
96static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
97unpack_565_to_8888 (__m128i lo)
98{
99 __m128i r, g, b, rb, t;
100
101 r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);
102 g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);
103 b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);
104
105 rb = _mm_or_si128 (r, b);
106 t = _mm_and_si128 (rb, mask_565_fix_rb);
107 t = _mm_srli_epi32 (t, 5);
108 rb = _mm_or_si128 (rb, t);
109
110 t = _mm_and_si128 (g, mask_565_fix_g);
111 t = _mm_srli_epi32 (t, 6);
112 g = _mm_or_si128 (g, t);
113
114 return _mm_or_si128 (rb, g);
115}
116
117static force_inline__inline__ __attribute__ ((__always_inline__)) void
118unpack_565_128_4x128 (__m128i data,
119 __m128i* data0,
120 __m128i* data1,
121 __m128i* data2,
122 __m128i* data3)
123{
124 __m128i lo, hi;
125
126 lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
127 hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
128
129 lo = unpack_565_to_8888 (lo);
130 hi = unpack_565_to_8888 (hi);
131
132 unpack_128_2x128 (lo, data0, data1);
133 unpack_128_2x128 (hi, data2, data3);
134}
135
136static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t
137pack_565_32_16 (uint32_t pixel)
138{
139 return (uint16_t) (((pixel >> 8) & 0xf800) |
140 ((pixel >> 5) & 0x07e0) |
141 ((pixel >> 3) & 0x001f));
142}
143
144static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
145pack_2x128_128 (__m128i lo, __m128i hi)
146{
147 return _mm_packus_epi16 (lo, hi);
148}
149
150static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
151pack_565_2x128_128 (__m128i lo, __m128i hi)
152{
153 __m128i data;
154 __m128i r, g1, g2, b;
155
156 data = pack_2x128_128 (lo, hi);
157
158 r = _mm_and_si128 (data, mask_565_r);
159 g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);
160 g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);
161 b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);
162
163 return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);
164}
165
166static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
167pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
168{
169 return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),
170 pack_565_2x128_128 (*xmm2, *xmm3));
171}
172
173static force_inline__inline__ __attribute__ ((__always_inline__)) int
174is_opaque (__m128i x)
175{
176 __m128i ffs = _mm_cmpeq_epi8 (x, x);
177
178 return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
179}
180
181static force_inline__inline__ __attribute__ ((__always_inline__)) int
182is_zero (__m128i x)
183{
184 return _mm_movemask_epi8 (
185 _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;
186}
187
188static force_inline__inline__ __attribute__ ((__always_inline__)) int
189is_transparent (__m128i x)
190{
191 return (_mm_movemask_epi8 (
192 _mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;
193}
194
195static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
196expand_pixel_32_1x128 (uint32_t data)
197{
198 return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0))((__m128i)__builtin_shufflevector((__v4si)(unpack_32_1x128 (data
)), (__v4si) {0}, ((((1) << 6) | ((0) << 4) | ((1
) << 2) | (0))) & 0x3, (((((1) << 6) | ((0) <<
4) | ((1) << 2) | (0))) & 0xc) >> 2, (((((1)
<< 6) | ((0) << 4) | ((1) << 2) | (0))) &
0x30) >> 4, (((((1) << 6) | ((0) << 4) | (
(1) << 2) | (0))) & 0xc0) >> 6))
;
199}
200
201static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
202expand_alpha_1x128 (__m128i data)
203{
204 return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,((__m128i)__builtin_shufflevector((__v8hi)(((__m128i)__builtin_shufflevector
((__v8hi)(data), (__v8hi) {0}, ((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0x3, (((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0xc) >>
2, (((((3) << 6) | ((3) << 4) | ((3) << 2)
| (3))) & 0x30) >> 4, (((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6
, 7))), (__v8hi) {0}, 0, 1, 2, 3, 4 + ((((((3) << 6) | (
(3) << 4) | ((3) << 2) | (3))) & 0x03) >>
0), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0x30) >>
4), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6)))
205 _MM_SHUFFLE (3, 3, 3, 3)),((__m128i)__builtin_shufflevector((__v8hi)(((__m128i)__builtin_shufflevector
((__v8hi)(data), (__v8hi) {0}, ((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0x3, (((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0xc) >>
2, (((((3) << 6) | ((3) << 4) | ((3) << 2)
| (3))) & 0x30) >> 4, (((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6
, 7))), (__v8hi) {0}, 0, 1, 2, 3, 4 + ((((((3) << 6) | (
(3) << 4) | ((3) << 2) | (3))) & 0x03) >>
0), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0x30) >>
4), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6)))
206 _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(((__m128i)__builtin_shufflevector
((__v8hi)(data), (__v8hi) {0}, ((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0x3, (((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0xc) >>
2, (((((3) << 6) | ((3) << 4) | ((3) << 2)
| (3))) & 0x30) >> 4, (((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6
, 7))), (__v8hi) {0}, 0, 1, 2, 3, 4 + ((((((3) << 6) | (
(3) << 4) | ((3) << 2) | (3))) & 0x03) >>
0), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0x30) >>
4), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6)))
;
207}
208
209static force_inline__inline__ __attribute__ ((__always_inline__)) void
210expand_alpha_2x128 (__m128i data_lo,
211 __m128i data_hi,
212 __m128i* alpha_lo,
213 __m128i* alpha_hi)
214{
215 __m128i lo, hi;
216
217 lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(data_lo), (__v8hi)
{0}, ((((3) << 6) | ((3) << 4) | ((3) << 2
) | (3))) & 0x3, (((((3) << 6) | ((3) << 4) |
((3) << 2) | (3))) & 0xc) >> 2, (((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3))) & 0x30)
>> 4, (((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6, 4, 5, 6, 7))
;
218 hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(data_hi), (__v8hi)
{0}, ((((3) << 6) | ((3) << 4) | ((3) << 2
) | (3))) & 0x3, (((((3) << 6) | ((3) << 4) |
((3) << 2) | (3))) & 0xc) >> 2, (((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3))) & 0x30)
>> 4, (((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6, 4, 5, 6, 7))
;
219
220 *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(lo), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((3) << 6) | ((3) << 4) | ((
3) << 2) | (3))) & 0x03) >> 0), 4 + ((((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3))) & 0x0c)
>> 2), 4 + ((((((3) << 6) | ((3) << 4) | (
(3) << 2) | (3))) & 0x30) >> 4), 4 + ((((((3)
<< 6) | ((3) << 4) | ((3) << 2) | (3))) &
0xc0) >> 6)))
;
221 *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(hi), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((3) << 6) | ((3) << 4) | ((
3) << 2) | (3))) & 0x03) >> 0), 4 + ((((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3))) & 0x0c)
>> 2), 4 + ((((((3) << 6) | ((3) << 4) | (
(3) << 2) | (3))) & 0x30) >> 4), 4 + ((((((3)
<< 6) | ((3) << 4) | ((3) << 2) | (3))) &
0xc0) >> 6)))
;
222}
223
224static force_inline__inline__ __attribute__ ((__always_inline__)) void
225expand_alpha_rev_2x128 (__m128i data_lo,
226 __m128i data_hi,
227 __m128i* alpha_lo,
228 __m128i* alpha_hi)
229{
230 __m128i lo, hi;
231
232 lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_shufflevector((__v8hi)(data_lo), (__v8hi)
{0}, ((((0) << 6) | ((0) << 4) | ((0) << 2
) | (0))) & 0x3, (((((0) << 6) | ((0) << 4) |
((0) << 2) | (0))) & 0xc) >> 2, (((((0) <<
6) | ((0) << 4) | ((0) << 2) | (0))) & 0x30)
>> 4, (((((0) << 6) | ((0) << 4) | ((0) <<
2) | (0))) & 0xc0) >> 6, 4, 5, 6, 7))
;
233 hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_shufflevector((__v8hi)(data_hi), (__v8hi)
{0}, ((((0) << 6) | ((0) << 4) | ((0) << 2
) | (0))) & 0x3, (((((0) << 6) | ((0) << 4) |
((0) << 2) | (0))) & 0xc) >> 2, (((((0) <<
6) | ((0) << 4) | ((0) << 2) | (0))) & 0x30)
>> 4, (((((0) << 6) | ((0) << 4) | ((0) <<
2) | (0))) & 0xc0) >> 6, 4, 5, 6, 7))
;
234 *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_shufflevector((__v8hi)(lo), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((0) << 6) | ((0) << 4) | ((
0) << 2) | (0))) & 0x03) >> 0), 4 + ((((((0) <<
6) | ((0) << 4) | ((0) << 2) | (0))) & 0x0c)
>> 2), 4 + ((((((0) << 6) | ((0) << 4) | (
(0) << 2) | (0))) & 0x30) >> 4), 4 + ((((((0)
<< 6) | ((0) << 4) | ((0) << 2) | (0))) &
0xc0) >> 6)))
;
235 *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_shufflevector((__v8hi)(hi), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((0) << 6) | ((0) << 4) | ((
0) << 2) | (0))) & 0x03) >> 0), 4 + ((((((0) <<
6) | ((0) << 4) | ((0) << 2) | (0))) & 0x0c)
>> 2), 4 + ((((((0) << 6) | ((0) << 4) | (
(0) << 2) | (0))) & 0x30) >> 4), 4 + ((((((0)
<< 6) | ((0) << 4) | ((0) << 2) | (0))) &
0xc0) >> 6)))
;
236}
237
238static force_inline__inline__ __attribute__ ((__always_inline__)) void
239pix_multiply_2x128 (__m128i* data_lo,
240 __m128i* data_hi,
241 __m128i* alpha_lo,
242 __m128i* alpha_hi,
243 __m128i* ret_lo,
244 __m128i* ret_hi)
245{
246 __m128i lo, hi;
247
248 lo = _mm_mullo_epi16 (*data_lo, *alpha_lo);
249 hi = _mm_mullo_epi16 (*data_hi, *alpha_hi);
250 lo = _mm_adds_epu16 (lo, mask_0080);
251 hi = _mm_adds_epu16 (hi, mask_0080);
252 *ret_lo = _mm_mulhi_epu16 (lo, mask_0101);
253 *ret_hi = _mm_mulhi_epu16 (hi, mask_0101);
254}
255
256static force_inline__inline__ __attribute__ ((__always_inline__)) void
257pix_add_multiply_2x128 (__m128i* src_lo,
258 __m128i* src_hi,
259 __m128i* alpha_dst_lo,
260 __m128i* alpha_dst_hi,
261 __m128i* dst_lo,
262 __m128i* dst_hi,
263 __m128i* alpha_src_lo,
264 __m128i* alpha_src_hi,
265 __m128i* ret_lo,
266 __m128i* ret_hi)
267{
268 __m128i t1_lo, t1_hi;
269 __m128i t2_lo, t2_hi;
270
271 pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi);
272 pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi);
273
274 *ret_lo = _mm_adds_epu8 (t1_lo, t2_lo);
275 *ret_hi = _mm_adds_epu8 (t1_hi, t2_hi);
276}
277
278static force_inline__inline__ __attribute__ ((__always_inline__)) void
279negate_2x128 (__m128i data_lo,
280 __m128i data_hi,
281 __m128i* neg_lo,
282 __m128i* neg_hi)
283{
284 *neg_lo = _mm_xor_si128 (data_lo, mask_00ff);
285 *neg_hi = _mm_xor_si128 (data_hi, mask_00ff);
286}
287
288static force_inline__inline__ __attribute__ ((__always_inline__)) void
289invert_colors_2x128 (__m128i data_lo,
290 __m128i data_hi,
291 __m128i* inv_lo,
292 __m128i* inv_hi)
293{
294 __m128i lo, hi;
295
296 lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_shufflevector((__v8hi)(data_lo), (__v8hi)
{0}, ((((3) << 6) | ((0) << 4) | ((1) << 2
) | (2))) & 0x3, (((((3) << 6) | ((0) << 4) |
((1) << 2) | (2))) & 0xc) >> 2, (((((3) <<
6) | ((0) << 4) | ((1) << 2) | (2))) & 0x30)
>> 4, (((((3) << 6) | ((0) << 4) | ((1) <<
2) | (2))) & 0xc0) >> 6, 4, 5, 6, 7))
;
297 hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_shufflevector((__v8hi)(data_hi), (__v8hi)
{0}, ((((3) << 6) | ((0) << 4) | ((1) << 2
) | (2))) & 0x3, (((((3) << 6) | ((0) << 4) |
((1) << 2) | (2))) & 0xc) >> 2, (((((3) <<
6) | ((0) << 4) | ((1) << 2) | (2))) & 0x30)
>> 4, (((((3) << 6) | ((0) << 4) | ((1) <<
2) | (2))) & 0xc0) >> 6, 4, 5, 6, 7))
;
298 *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_shufflevector((__v8hi)(lo), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((3) << 6) | ((0) << 4) | ((
1) << 2) | (2))) & 0x03) >> 0), 4 + ((((((3) <<
6) | ((0) << 4) | ((1) << 2) | (2))) & 0x0c)
>> 2), 4 + ((((((3) << 6) | ((0) << 4) | (
(1) << 2) | (2))) & 0x30) >> 4), 4 + ((((((3)
<< 6) | ((0) << 4) | ((1) << 2) | (2))) &
0xc0) >> 6)))
;
299 *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_shufflevector((__v8hi)(hi), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((3) << 6) | ((0) << 4) | ((
1) << 2) | (2))) & 0x03) >> 0), 4 + ((((((3) <<
6) | ((0) << 4) | ((1) << 2) | (2))) & 0x0c)
>> 2), 4 + ((((((3) << 6) | ((0) << 4) | (
(1) << 2) | (2))) & 0x30) >> 4), 4 + ((((((3)
<< 6) | ((0) << 4) | ((1) << 2) | (2))) &
0xc0) >> 6)))
;
300}
301
302static force_inline__inline__ __attribute__ ((__always_inline__)) void
303over_2x128 (__m128i* src_lo,
304 __m128i* src_hi,
305 __m128i* alpha_lo,
306 __m128i* alpha_hi,
307 __m128i* dst_lo,
308 __m128i* dst_hi)
309{
310 __m128i t1, t2;
311
312 negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);
313
314 pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);
315
316 *dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo);
317 *dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi);
318}
319
320static force_inline__inline__ __attribute__ ((__always_inline__)) void
321over_rev_non_pre_2x128 (__m128i src_lo,
322 __m128i src_hi,
323 __m128i* dst_lo,
324 __m128i* dst_hi)
325{
326 __m128i lo, hi;
327 __m128i alpha_lo, alpha_hi;
328
329 expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi);
330
331 lo = _mm_or_si128 (alpha_lo, mask_alpha);
332 hi = _mm_or_si128 (alpha_hi, mask_alpha);
333
334 invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi);
335
336 pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi);
337
338 over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi);
339}
340
341static force_inline__inline__ __attribute__ ((__always_inline__)) void
342in_over_2x128 (__m128i* src_lo,
343 __m128i* src_hi,
344 __m128i* alpha_lo,
345 __m128i* alpha_hi,
346 __m128i* mask_lo,
347 __m128i* mask_hi,
348 __m128i* dst_lo,
349 __m128i* dst_hi)
350{
351 __m128i s_lo, s_hi;
352 __m128i a_lo, a_hi;
353
354 pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
355 pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
356
357 over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
358}
359
360/* load 4 pixels from a 16-byte boundary aligned address */
361static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
362load_128_aligned (__m128i* src)
363{
364 return _mm_load_si128 (src);
365}
366
367/* load 4 pixels from a unaligned address */
368static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
369load_128_unaligned (const __m128i* src)
370{
371 return _mm_loadu_si128 (src);
372}
373
374/* save 4 pixels using Write Combining memory on a 16-byte
375 * boundary aligned address
376 */
377static force_inline__inline__ __attribute__ ((__always_inline__)) void
378save_128_write_combining (__m128i* dst,
379 __m128i data)
380{
381 _mm_stream_si128 (dst, data);
382}
383
384/* save 4 pixels on a 16-byte boundary aligned address */
385static force_inline__inline__ __attribute__ ((__always_inline__)) void
386save_128_aligned (__m128i* dst,
387 __m128i data)
388{
389 _mm_store_si128 (dst, data);
390}
391
392/* save 4 pixels on a unaligned address */
393static force_inline__inline__ __attribute__ ((__always_inline__)) void
394save_128_unaligned (__m128i* dst,
395 __m128i data)
396{
397 _mm_storeu_si128 (dst, data);
398}
399
400/* ------------------------------------------------------------------
401 * MMX inlines
402 */
403
404static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
405load_32_1x64 (uint32_t data)
406{
407 return _mm_cvtsi32_si64 (data);
408}
409
410static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
411unpack_32_1x64 (uint32_t data)
412{
413 return _mm_unpacklo_pi8 (load_32_1x64 (data), _mm_setzero_si64 ());
414}
415
416static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
417expand_alpha_1x64 (__m64 data)
418{
419 return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 3, 3, 3))((__m64)__builtin_ia32_pshufw(data, (((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))))
;
420}
421
422static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
423expand_alpha_rev_1x64 (__m64 data)
424{
425 return _mm_shuffle_pi16 (data, _MM_SHUFFLE (0, 0, 0, 0))((__m64)__builtin_ia32_pshufw(data, (((0) << 6) | ((0) <<
4) | ((0) << 2) | (0))))
;
426}
427
428static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
429expand_pixel_8_1x64 (uint8_t data)
430{
431 return _mm_shuffle_pi16 (((__m64)__builtin_ia32_pshufw(unpack_32_1x64 ((uint32_t)data)
, (((0) << 6) | ((0) << 4) | ((0) << 2) | (
0))))
432 unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0))((__m64)__builtin_ia32_pshufw(unpack_32_1x64 ((uint32_t)data)
, (((0) << 6) | ((0) << 4) | ((0) << 2) | (
0))))
;
433}
434
435static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
436pix_multiply_1x64 (__m64 data,
437 __m64 alpha)
438{
439 return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (data, alpha),
440 mask_x0080),
441 mask_x0101);
442}
443
444static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
445pix_add_multiply_1x64 (__m64* src,
446 __m64* alpha_dst,
447 __m64* dst,
448 __m64* alpha_src)
449{
450 __m64 t1 = pix_multiply_1x64 (*src, *alpha_dst);
451 __m64 t2 = pix_multiply_1x64 (*dst, *alpha_src);
452
453 return _mm_adds_pu8 (t1, t2);
454}
455
456static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
457negate_1x64 (__m64 data)
458{
459 return _mm_xor_si64 (data, mask_x00ff);
460}
461
462static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
463invert_colors_1x64 (__m64 data)
464{
465 return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 0, 1, 2))((__m64)__builtin_ia32_pshufw(data, (((3) << 6) | ((0) <<
4) | ((1) << 2) | (2))))
;
466}
467
468static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
469over_1x64 (__m64 src, __m64 alpha, __m64 dst)
470{
471 return _mm_adds_pu8 (src, pix_multiply_1x64 (dst, negate_1x64 (alpha)));
472}
473
474static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
475in_over_1x64 (__m64* src, __m64* alpha, __m64* mask, __m64* dst)
476{
477 return over_1x64 (pix_multiply_1x64 (*src, *mask),
478 pix_multiply_1x64 (*alpha, *mask),
479 *dst);
480}
481
482static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
483over_rev_non_pre_1x64 (__m64 src, __m64 dst)
484{
485 __m64 alpha = expand_alpha_1x64 (src);
486
487 return over_1x64 (pix_multiply_1x64 (invert_colors_1x64 (src),
488 _mm_or_si64 (alpha, mask_x_alpha)),
489 alpha,
490 dst);
491}
492
493static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
494pack_1x64_32 (__m64 data)
495{
496 return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64 ()));
497}
498
499/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
500 *
501 * 00RR00GG00BB
502 *
503 * --- Expanding 565 in the low word ---
504 *
505 * m = (m << (32 - 3)) | (m << (16 - 5)) | m;
506 * m = m & (01f0003f001f);
507 * m = m * (008404100840);
508 * m = m >> 8;
509 *
510 * Note the trick here - the top word is shifted by another nibble to
511 * avoid it bumping into the middle word
512 */
513static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
514expand565_16_1x64 (uint16_t pixel)
515{
516 __m64 p;
517 __m64 t1, t2;
518
519 p = _mm_cvtsi32_si64 ((uint32_t) pixel);
520
521 t1 = _mm_slli_si64 (p, 36 - 11);
522 t2 = _mm_slli_si64 (p, 16 - 5);
523
524 p = _mm_or_si64 (t1, p);
525 p = _mm_or_si64 (t2, p);
526 p = _mm_and_si64 (p, mask_x565_rgb);
527 p = _mm_mullo_pi16 (p, mask_x565_unpack);
528
529 return _mm_srli_pi16 (p, 8);
530}
531
532/* ----------------------------------------------------------------------------
533 * Compose Core transformations
534 */
535static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
536core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
537{
538 uint8_t a;
539 __m64 ms;
540
541 a = src >> 24;
542
543 if (a == 0xff)
544 {
545 return src;
546 }
547 else if (src)
548 {
549 ms = unpack_32_1x64 (src);
550 return pack_1x64_32 (
551 over_1x64 (ms, expand_alpha_1x64 (ms), unpack_32_1x64 (dst)));
552 }
553
554 return dst;
555}
556
557static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
558combine1 (const uint32_t *ps, const uint32_t *pm)
559{
560 uint32_t s = *ps;
561
562 if (pm)
563 {
564 __m64 ms, mm;
565
566 mm = unpack_32_1x64 (*pm);
567 mm = expand_alpha_1x64 (mm);
568
569 ms = unpack_32_1x64 (s);
570 ms = pix_multiply_1x64 (ms, mm);
571
572 s = pack_1x64_32 (ms);
573 }
574
575 return s;
576}
577
578static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
579combine4 (const __m128i *ps, const __m128i *pm)
580{
581 __m128i xmm_src_lo, xmm_src_hi;
582 __m128i xmm_msk_lo, xmm_msk_hi;
583 __m128i s;
584
585 if (pm)
586 {
587 xmm_msk_lo = load_128_unaligned (pm);
588
589 if (is_transparent (xmm_msk_lo))
590 return _mm_setzero_si128 ();
591 }
592
593 s = load_128_unaligned (ps);
594
595 if (pm)
596 {
597 unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);
598 unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);
599
600 expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);
601
602 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
603 &xmm_msk_lo, &xmm_msk_hi,
604 &xmm_src_lo, &xmm_src_hi);
605
606 s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);
607 }
608
609 return s;
610}
611
612static force_inline__inline__ __attribute__ ((__always_inline__)) void
613core_combine_over_u_sse2 (uint32_t* pd,
614 const uint32_t* ps,
615 const uint32_t* pm,
616 int w)
617{
618 uint32_t s, d;
619
620 __m128i xmm_dst_lo, xmm_dst_hi;
621 __m128i xmm_src_lo, xmm_src_hi;
622 __m128i xmm_alpha_lo, xmm_alpha_hi;
623
624 /* Align dst on a 16-byte boundary */
625 while (w && ((unsigned long)pd & 15))
626 {
627 d = *pd;
628 s = combine1 (ps, pm);
629
630 *pd++ = core_combine_over_u_pixel_sse2 (s, d);
631 ps++;
632 if (pm)
633 pm++;
634 w--;
635 }
636
637 while (w >= 4)
638 {
639 /* I'm loading unaligned because I'm not sure about
640 * the address alignment.
641 */
642 xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
643
644 if (is_opaque (xmm_src_hi))
645 {
646 save_128_aligned ((__m128i*)pd, xmm_src_hi);
647 }
648 else if (!is_zero (xmm_src_hi))
649 {
650 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
651
652 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
653 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
654
655 expand_alpha_2x128 (
656 xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
657
658 over_2x128 (&xmm_src_lo, &xmm_src_hi,
659 &xmm_alpha_lo, &xmm_alpha_hi,
660 &xmm_dst_lo, &xmm_dst_hi);
661
662 /* rebuid the 4 pixel data and save*/
663 save_128_aligned ((__m128i*)pd,
664 pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
665 }
666
667 w -= 4;
668 ps += 4;
669 pd += 4;
670 if (pm)
671 pm += 4;
672 }
673
674 while (w)
675 {
676 d = *pd;
677 s = combine1 (ps, pm);
678
679 *pd++ = core_combine_over_u_pixel_sse2 (s, d);
680 ps++;
681 if (pm)
682 pm++;
683
684 w--;
685 }
686}
687
688static force_inline__inline__ __attribute__ ((__always_inline__)) void
689core_combine_over_reverse_u_sse2 (uint32_t* pd,
690 const uint32_t* ps,
691 const uint32_t* pm,
692 int w)
693{
694 uint32_t s, d;
695
696 __m128i xmm_dst_lo, xmm_dst_hi;
697 __m128i xmm_src_lo, xmm_src_hi;
698 __m128i xmm_alpha_lo, xmm_alpha_hi;
699
700 /* Align dst on a 16-byte boundary */
701 while (w &&
702 ((unsigned long)pd & 15))
703 {
704 d = *pd;
705 s = combine1 (ps, pm);
706
707 *pd++ = core_combine_over_u_pixel_sse2 (d, s);
708 w--;
709 ps++;
710 if (pm)
711 pm++;
712 }
713
714 while (w >= 4)
715 {
716 /* I'm loading unaligned because I'm not sure
717 * about the address alignment.
718 */
719 xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
720 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
721
722 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
723 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
724
725 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
726 &xmm_alpha_lo, &xmm_alpha_hi);
727
728 over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
729 &xmm_alpha_lo, &xmm_alpha_hi,
730 &xmm_src_lo, &xmm_src_hi);
731
732 /* rebuid the 4 pixel data and save*/
733 save_128_aligned ((__m128i*)pd,
734 pack_2x128_128 (xmm_src_lo, xmm_src_hi));
735
736 w -= 4;
737 ps += 4;
738 pd += 4;
739
740 if (pm)
741 pm += 4;
742 }
743
744 while (w)
745 {
746 d = *pd;
747 s = combine1 (ps, pm);
748
749 *pd++ = core_combine_over_u_pixel_sse2 (d, s);
750 ps++;
751 w--;
752 if (pm)
753 pm++;
754 }
755}
756
757static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
758core_combine_in_u_pixelsse2 (uint32_t src, uint32_t dst)
759{
760 uint32_t maska = src >> 24;
761
762 if (maska == 0)
763 {
764 return 0;
765 }
766 else if (maska != 0xff)
767 {
768 return pack_1x64_32 (
769 pix_multiply_1x64 (unpack_32_1x64 (dst),
770 expand_alpha_1x64 (unpack_32_1x64 (src))));
771 }
772
773 return dst;
774}
775
776static force_inline__inline__ __attribute__ ((__always_inline__)) void
777core_combine_in_u_sse2 (uint32_t* pd,
778 const uint32_t* ps,
779 const uint32_t* pm,
780 int w)
781{
782 uint32_t s, d;
783
784 __m128i xmm_src_lo, xmm_src_hi;
785 __m128i xmm_dst_lo, xmm_dst_hi;
786
787 while (w && ((unsigned long) pd & 15))
788 {
789 s = combine1 (ps, pm);
790 d = *pd;
791
792 *pd++ = core_combine_in_u_pixelsse2 (d, s);
793 w--;
794 ps++;
795 if (pm)
796 pm++;
797 }
798
799 while (w >= 4)
800 {
801 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
802 xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
803
804 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
805 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
806
807 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
808 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
809 &xmm_dst_lo, &xmm_dst_hi,
810 &xmm_dst_lo, &xmm_dst_hi);
811
812 save_128_aligned ((__m128i*)pd,
813 pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
814
815 ps += 4;
816 pd += 4;
817 w -= 4;
818 if (pm)
819 pm += 4;
820 }
821
822 while (w)
823 {
824 s = combine1 (ps, pm);
825 d = *pd;
826
827 *pd++ = core_combine_in_u_pixelsse2 (d, s);
828 w--;
829 ps++;
830 if (pm)
831 pm++;
832 }
833}
834
835static force_inline__inline__ __attribute__ ((__always_inline__)) void
836core_combine_reverse_in_u_sse2 (uint32_t* pd,
837 const uint32_t* ps,
838 const uint32_t *pm,
839 int w)
840{
841 uint32_t s, d;
842
843 __m128i xmm_src_lo, xmm_src_hi;
844 __m128i xmm_dst_lo, xmm_dst_hi;
845
846 while (w && ((unsigned long) pd & 15))
847 {
848 s = combine1 (ps, pm);
849 d = *pd;
850
851 *pd++ = core_combine_in_u_pixelsse2 (s, d);
852 ps++;
853 w--;
854 if (pm)
855 pm++;
856 }
857
858 while (w >= 4)
859 {
860 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
861 xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
862
863 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
864 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
865
866 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
867 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
868 &xmm_src_lo, &xmm_src_hi,
869 &xmm_dst_lo, &xmm_dst_hi);
870
871 save_128_aligned (
872 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
873
874 ps += 4;
875 pd += 4;
876 w -= 4;
877 if (pm)
878 pm += 4;
879 }
880
881 while (w)
882 {
883 s = combine1 (ps, pm);
884 d = *pd;
885
886 *pd++ = core_combine_in_u_pixelsse2 (s, d);
887 w--;
888 ps++;
889 if (pm)
890 pm++;
891 }
892}
893
894static force_inline__inline__ __attribute__ ((__always_inline__)) void
895core_combine_reverse_out_u_sse2 (uint32_t* pd,
896 const uint32_t* ps,
897 const uint32_t* pm,
898 int w)
899{
900 while (w && ((unsigned long) pd & 15))
901 {
902 uint32_t s = combine1 (ps, pm);
903 uint32_t d = *pd;
904
905 *pd++ = pack_1x64_32 (
906 pix_multiply_1x64 (
907 unpack_32_1x64 (d), negate_1x64 (
908 expand_alpha_1x64 (unpack_32_1x64 (s)))));
909
910 if (pm)
911 pm++;
912 ps++;
913 w--;
914 }
915
916 while (w >= 4)
917 {
918 __m128i xmm_src_lo, xmm_src_hi;
919 __m128i xmm_dst_lo, xmm_dst_hi;
920
921 xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
922 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
923
924 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
925 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
926
927 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
928 negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
929
930 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
931 &xmm_src_lo, &xmm_src_hi,
932 &xmm_dst_lo, &xmm_dst_hi);
933
934 save_128_aligned (
935 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
936
937 ps += 4;
938 pd += 4;
939 if (pm)
940 pm += 4;
941
942 w -= 4;
943 }
944
945 while (w)
946 {
947 uint32_t s = combine1 (ps, pm);
948 uint32_t d = *pd;
949
950 *pd++ = pack_1x64_32 (
951 pix_multiply_1x64 (
952 unpack_32_1x64 (d), negate_1x64 (
953 expand_alpha_1x64 (unpack_32_1x64 (s)))));
954 ps++;
955 if (pm)
956 pm++;
957 w--;
958 }
959}
960
961static force_inline__inline__ __attribute__ ((__always_inline__)) void
962core_combine_out_u_sse2 (uint32_t* pd,
963 const uint32_t* ps,
964 const uint32_t* pm,
965 int w)
966{
967 while (w && ((unsigned long) pd & 15))
968 {
969 uint32_t s = combine1 (ps, pm);
970 uint32_t d = *pd;
971
972 *pd++ = pack_1x64_32 (
973 pix_multiply_1x64 (
974 unpack_32_1x64 (s), negate_1x64 (
975 expand_alpha_1x64 (unpack_32_1x64 (d)))));
976 w--;
977 ps++;
978 if (pm)
979 pm++;
980 }
981
982 while (w >= 4)
983 {
984 __m128i xmm_src_lo, xmm_src_hi;
985 __m128i xmm_dst_lo, xmm_dst_hi;
986
987 xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
988 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
989
990 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
991 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
992
993 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
994 negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
995
996 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
997 &xmm_dst_lo, &xmm_dst_hi,
998 &xmm_dst_lo, &xmm_dst_hi);
999
1000 save_128_aligned (
1001 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1002
1003 ps += 4;
1004 pd += 4;
1005 w -= 4;
1006 if (pm)
1007 pm += 4;
1008 }
1009
1010 while (w)
1011 {
1012 uint32_t s = combine1 (ps, pm);
1013 uint32_t d = *pd;
1014
1015 *pd++ = pack_1x64_32 (
1016 pix_multiply_1x64 (
1017 unpack_32_1x64 (s), negate_1x64 (
1018 expand_alpha_1x64 (unpack_32_1x64 (d)))));
1019 w--;
1020 ps++;
1021 if (pm)
1022 pm++;
1023 }
1024}
1025
1026static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1027core_combine_atop_u_pixel_sse2 (uint32_t src,
1028 uint32_t dst)
1029{
1030 __m64 s = unpack_32_1x64 (src);
1031 __m64 d = unpack_32_1x64 (dst);
1032
1033 __m64 sa = negate_1x64 (expand_alpha_1x64 (s));
1034 __m64 da = expand_alpha_1x64 (d);
1035
1036 return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa));
1037}
1038
1039static force_inline__inline__ __attribute__ ((__always_inline__)) void
1040core_combine_atop_u_sse2 (uint32_t* pd,
1041 const uint32_t* ps,
1042 const uint32_t* pm,
1043 int w)
1044{
1045 uint32_t s, d;
1046
1047 __m128i xmm_src_lo, xmm_src_hi;
1048 __m128i xmm_dst_lo, xmm_dst_hi;
1049 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1050 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1051
1052 while (w && ((unsigned long) pd & 15))
1053 {
1054 s = combine1 (ps, pm);
1055 d = *pd;
1056
1057 *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
1058 w--;
1059 ps++;
1060 if (pm)
1061 pm++;
1062 }
1063
1064 while (w >= 4)
1065 {
1066 xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
1067 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
1068
1069 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1070 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1071
1072 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1073 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1074 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1075 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1076
1077 negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
1078 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1079
1080 pix_add_multiply_2x128 (
1081 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
1082 &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1083 &xmm_dst_lo, &xmm_dst_hi);
1084
1085 save_128_aligned (
1086 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1087
1088 ps += 4;
1089 pd += 4;
1090 w -= 4;
1091 if (pm)
1092 pm += 4;
1093 }
1094
1095 while (w)
1096 {
1097 s = combine1 (ps, pm);
1098 d = *pd;
1099
1100 *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
1101 w--;
1102 ps++;
1103 if (pm)
1104 pm++;
1105 }
1106}
1107
1108static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1109core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
1110 uint32_t dst)
1111{
1112 __m64 s = unpack_32_1x64 (src);
1113 __m64 d = unpack_32_1x64 (dst);
1114
1115 __m64 sa = expand_alpha_1x64 (s);
1116 __m64 da = negate_1x64 (expand_alpha_1x64 (d));
1117
1118 return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa));
1119}
1120
1121static force_inline__inline__ __attribute__ ((__always_inline__)) void
1122core_combine_reverse_atop_u_sse2 (uint32_t* pd,
1123 const uint32_t* ps,
1124 const uint32_t* pm,
1125 int w)
1126{
1127 uint32_t s, d;
1128
1129 __m128i xmm_src_lo, xmm_src_hi;
1130 __m128i xmm_dst_lo, xmm_dst_hi;
1131 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1132 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1133
1134 while (w && ((unsigned long) pd & 15))
1135 {
1136 s = combine1 (ps, pm);
1137 d = *pd;
1138
1139 *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
1140 ps++;
1141 w--;
1142 if (pm)
1143 pm++;
1144 }
1145
1146 while (w >= 4)
1147 {
1148 xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
1149 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
1150
1151 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1152 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1153
1154 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1155 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1156 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1157 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1158
1159 negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
1160 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1161
1162 pix_add_multiply_2x128 (
1163 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
1164 &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1165 &xmm_dst_lo, &xmm_dst_hi);
1166
1167 save_128_aligned (
1168 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1169
1170 ps += 4;
1171 pd += 4;
1172 w -= 4;
1173 if (pm)
1174 pm += 4;
1175 }
1176
1177 while (w)
1178 {
1179 s = combine1 (ps, pm);
1180 d = *pd;
1181
1182 *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
1183 ps++;
1184 w--;
1185 if (pm)
1186 pm++;
1187 }
1188}
1189
1190static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1191core_combine_xor_u_pixel_sse2 (uint32_t src,
1192 uint32_t dst)
1193{
1194 __m64 s = unpack_32_1x64 (src);
1195 __m64 d = unpack_32_1x64 (dst);
1196
1197 __m64 neg_d = negate_1x64 (expand_alpha_1x64 (d));
1198 __m64 neg_s = negate_1x64 (expand_alpha_1x64 (s));
1199
1200 return pack_1x64_32 (pix_add_multiply_1x64 (&s, &neg_d, &d, &neg_s));
1201}
1202
1203static force_inline__inline__ __attribute__ ((__always_inline__)) void
1204core_combine_xor_u_sse2 (uint32_t* dst,
1205 const uint32_t* src,
1206 const uint32_t *mask,
1207 int width)
1208{
1209 int w = width;
1210 uint32_t s, d;
1211 uint32_t* pd = dst;
1212 const uint32_t* ps = src;
1213 const uint32_t* pm = mask;
1214
1215 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
1216 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
1217 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1218 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1219
1220 while (w && ((unsigned long) pd & 15))
1221 {
1222 s = combine1 (ps, pm);
1223 d = *pd;
1224
1225 *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
1226 w--;
1227 ps++;
1228 if (pm)
1229 pm++;
1230 }
1231
1232 while (w >= 4)
1233 {
1234 xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
1235 xmm_dst = load_128_aligned ((__m128i*) pd);
1236
1237 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
1238 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
1239
1240 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1241 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1242 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1243 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1244
1245 negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
1246 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1247 negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
1248 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1249
1250 pix_add_multiply_2x128 (
1251 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
1252 &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1253 &xmm_dst_lo, &xmm_dst_hi);
1254
1255 save_128_aligned (
1256 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1257
1258 ps += 4;
1259 pd += 4;
1260 w -= 4;
1261 if (pm)
1262 pm += 4;
1263 }
1264
1265 while (w)
1266 {
1267 s = combine1 (ps, pm);
1268 d = *pd;
1269
1270 *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
1271 w--;
1272 ps++;
1273 if (pm)
1274 pm++;
1275 }
1276}
1277
1278static force_inline__inline__ __attribute__ ((__always_inline__)) void
1279core_combine_add_u_sse2 (uint32_t* dst,
1280 const uint32_t* src,
1281 const uint32_t* mask,
1282 int width)
1283{
1284 int w = width;
1285 uint32_t s, d;
1286 uint32_t* pd = dst;
1287 const uint32_t* ps = src;
1288 const uint32_t* pm = mask;
1289
1290 while (w && (unsigned long)pd & 15)
1291 {
1292 s = combine1 (ps, pm);
1293 d = *pd;
1294
1295 ps++;
1296 if (pm)
1297 pm++;
1298 *pd++ = _mm_cvtsi64_si32 (
1299 _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
1300 w--;
1301 }
1302
1303 while (w >= 4)
1304 {
1305 __m128i s;
1306
1307 s = combine4 ((__m128i*)ps, (__m128i*)pm);
1308
1309 save_128_aligned (
1310 (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd)));
1311
1312 pd += 4;
1313 ps += 4;
1314 if (pm)
1315 pm += 4;
1316 w -= 4;
1317 }
1318
1319 while (w--)
1320 {
1321 s = combine1 (ps, pm);
1322 d = *pd;
1323
1324 ps++;
1325 *pd++ = _mm_cvtsi64_si32 (
1326 _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
1327 if (pm)
1328 pm++;
1329 }
1330}
1331
1332static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1333core_combine_saturate_u_pixel_sse2 (uint32_t src,
1334 uint32_t dst)
1335{
1336 __m64 ms = unpack_32_1x64 (src);
1337 __m64 md = unpack_32_1x64 (dst);
1338 uint32_t sa = src >> 24;
1339 uint32_t da = ~dst >> 24;
1340
1341 if (sa > da)
1342 {
1343 ms = pix_multiply_1x64 (
1344 ms, expand_alpha_1x64 (unpack_32_1x64 (DIV_UN8 (da, sa)(((uint16_t) (da) * 0xff) / (sa)) << 24)));
1345 }
1346
1347 return pack_1x64_32 (_mm_adds_pu16 (md, ms));
1348}
1349
1350static force_inline__inline__ __attribute__ ((__always_inline__)) void
1351core_combine_saturate_u_sse2 (uint32_t * pd,
1352 const uint32_t *ps,
1353 const uint32_t *pm,
1354 int w)
1355{
1356 uint32_t s, d;
1357
1358 uint32_t pack_cmp;
1359 __m128i xmm_src, xmm_dst;
1360
1361 while (w && (unsigned long)pd & 15)
1362 {
1363 s = combine1 (ps, pm);
1364 d = *pd;
1365
1366 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1367 w--;
1368 ps++;
1369 if (pm)
1370 pm++;
1371 }
1372
1373 while (w >= 4)
1374 {
1375 xmm_dst = load_128_aligned ((__m128i*)pd);
1376 xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
1377
1378 pack_cmp = _mm_movemask_epi8 (
1379 _mm_cmpgt_epi32 (
1380 _mm_srli_epi32 (xmm_src, 24),
1381 _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
1382
1383 /* if some alpha src is grater than respective ~alpha dst */
1384 if (pack_cmp)
1385 {
1386 s = combine1 (ps++, pm);
1387 d = *pd;
1388 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1389 if (pm)
1390 pm++;
1391
1392 s = combine1 (ps++, pm);
1393 d = *pd;
1394 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1395 if (pm)
1396 pm++;
1397
1398 s = combine1 (ps++, pm);
1399 d = *pd;
1400 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1401 if (pm)
1402 pm++;
1403
1404 s = combine1 (ps++, pm);
1405 d = *pd;
1406 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1407 if (pm)
1408 pm++;
1409 }
1410 else
1411 {
1412 save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
1413
1414 pd += 4;
1415 ps += 4;
1416 if (pm)
1417 pm += 4;
1418 }
1419
1420 w -= 4;
1421 }
1422
1423 while (w--)
1424 {
1425 s = combine1 (ps, pm);
1426 d = *pd;
1427
1428 *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
1429 ps++;
1430 if (pm)
1431 pm++;
1432 }
1433}
1434
1435static force_inline__inline__ __attribute__ ((__always_inline__)) void
1436core_combine_src_ca_sse2 (uint32_t* pd,
1437 const uint32_t* ps,
1438 const uint32_t *pm,
1439 int w)
1440{
1441 uint32_t s, m;
1442
1443 __m128i xmm_src_lo, xmm_src_hi;
1444 __m128i xmm_mask_lo, xmm_mask_hi;
1445 __m128i xmm_dst_lo, xmm_dst_hi;
1446
1447 while (w && (unsigned long)pd & 15)
1448 {
1449 s = *ps++;
1450 m = *pm++;
1451 *pd++ = pack_1x64_32 (
1452 pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
1453 w--;
1454 }
1455
1456 while (w >= 4)
1457 {
1458 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1459 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1460
1461 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1462 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1463
1464 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1465 &xmm_mask_lo, &xmm_mask_hi,
1466 &xmm_dst_lo, &xmm_dst_hi);
1467
1468 save_128_aligned (
1469 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1470
1471 ps += 4;
1472 pd += 4;
1473 pm += 4;
1474 w -= 4;
1475 }
1476
1477 while (w)
1478 {
1479 s = *ps++;
1480 m = *pm++;
1481 *pd++ = pack_1x64_32 (
1482 pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
1483 w--;
1484 }
1485}
1486
1487static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1488core_combine_over_ca_pixel_sse2 (uint32_t src,
1489 uint32_t mask,
1490 uint32_t dst)
1491{
1492 __m64 s = unpack_32_1x64 (src);
1493 __m64 expAlpha = expand_alpha_1x64 (s);
1494 __m64 unpk_mask = unpack_32_1x64 (mask);
1495 __m64 unpk_dst = unpack_32_1x64 (dst);
1496
1497 return pack_1x64_32 (in_over_1x64 (&s, &expAlpha, &unpk_mask, &unpk_dst));
1498}
1499
1500static force_inline__inline__ __attribute__ ((__always_inline__)) void
1501core_combine_over_ca_sse2 (uint32_t* pd,
1502 const uint32_t* ps,
1503 const uint32_t *pm,
1504 int w)
1505{
1506 uint32_t s, m, d;
1507
1508 __m128i xmm_alpha_lo, xmm_alpha_hi;
1509 __m128i xmm_src_lo, xmm_src_hi;
1510 __m128i xmm_dst_lo, xmm_dst_hi;
1511 __m128i xmm_mask_lo, xmm_mask_hi;
1512
1513 while (w && (unsigned long)pd & 15)
1514 {
1515 s = *ps++;
1516 m = *pm++;
1517 d = *pd;
1518
1519 *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
1520 w--;
1521 }
1522
1523 while (w >= 4)
1524 {
1525 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1526 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1527 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1528
1529 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1530 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1531 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1532
1533 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1534 &xmm_alpha_lo, &xmm_alpha_hi);
1535
1536 in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
1537 &xmm_alpha_lo, &xmm_alpha_hi,
1538 &xmm_mask_lo, &xmm_mask_hi,
1539 &xmm_dst_lo, &xmm_dst_hi);
1540
1541 save_128_aligned (
1542 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1543
1544 ps += 4;
1545 pd += 4;
1546 pm += 4;
1547 w -= 4;
1548 }
1549
1550 while (w)
1551 {
1552 s = *ps++;
1553 m = *pm++;
1554 d = *pd;
1555
1556 *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
1557 w--;
1558 }
1559}
1560
1561static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1562core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
1563 uint32_t mask,
1564 uint32_t dst)
1565{
1566 __m64 d = unpack_32_1x64 (dst);
1567
1568 return pack_1x64_32 (
1569 over_1x64 (d, expand_alpha_1x64 (d),
1570 pix_multiply_1x64 (unpack_32_1x64 (src),
1571 unpack_32_1x64 (mask))));
1572}
1573
1574static force_inline__inline__ __attribute__ ((__always_inline__)) void
1575core_combine_over_reverse_ca_sse2 (uint32_t* pd,
1576 const uint32_t* ps,
1577 const uint32_t *pm,
1578 int w)
1579{
1580 uint32_t s, m, d;
1581
1582 __m128i xmm_alpha_lo, xmm_alpha_hi;
1583 __m128i xmm_src_lo, xmm_src_hi;
1584 __m128i xmm_dst_lo, xmm_dst_hi;
1585 __m128i xmm_mask_lo, xmm_mask_hi;
1586
1587 while (w && (unsigned long)pd & 15)
1588 {
1589 s = *ps++;
1590 m = *pm++;
1591 d = *pd;
1592
1593 *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
1594 w--;
1595 }
1596
1597 while (w >= 4)
1598 {
1599 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1600 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1601 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1602
1603 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1604 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1605 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1606
1607 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1608 &xmm_alpha_lo, &xmm_alpha_hi);
1609 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1610 &xmm_mask_lo, &xmm_mask_hi,
1611 &xmm_mask_lo, &xmm_mask_hi);
1612
1613 over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1614 &xmm_alpha_lo, &xmm_alpha_hi,
1615 &xmm_mask_lo, &xmm_mask_hi);
1616
1617 save_128_aligned (
1618 (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
1619
1620 ps += 4;
1621 pd += 4;
1622 pm += 4;
1623 w -= 4;
1624 }
1625
1626 while (w)
1627 {
1628 s = *ps++;
1629 m = *pm++;
1630 d = *pd;
1631
1632 *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
1633 w--;
1634 }
1635}
1636
1637static force_inline__inline__ __attribute__ ((__always_inline__)) void
1638core_combine_in_ca_sse2 (uint32_t * pd,
1639 const uint32_t *ps,
1640 const uint32_t *pm,
1641 int w)
1642{
1643 uint32_t s, m, d;
1644
1645 __m128i xmm_alpha_lo, xmm_alpha_hi;
1646 __m128i xmm_src_lo, xmm_src_hi;
1647 __m128i xmm_dst_lo, xmm_dst_hi;
1648 __m128i xmm_mask_lo, xmm_mask_hi;
1649
1650 while (w && (unsigned long)pd & 15)
1651 {
1652 s = *ps++;
1653 m = *pm++;
1654 d = *pd;
1655
1656 *pd++ = pack_1x64_32 (
1657 pix_multiply_1x64 (
1658 pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)),
1659 expand_alpha_1x64 (unpack_32_1x64 (d))));
1660
1661 w--;
1662 }
1663
1664 while (w >= 4)
1665 {
1666 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1667 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1668 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1669
1670 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1671 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1672 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1673
1674 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1675 &xmm_alpha_lo, &xmm_alpha_hi);
1676
1677 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1678 &xmm_mask_lo, &xmm_mask_hi,
1679 &xmm_dst_lo, &xmm_dst_hi);
1680
1681 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1682 &xmm_alpha_lo, &xmm_alpha_hi,
1683 &xmm_dst_lo, &xmm_dst_hi);
1684
1685 save_128_aligned (
1686 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1687
1688 ps += 4;
1689 pd += 4;
1690 pm += 4;
1691 w -= 4;
1692 }
1693
1694 while (w)
1695 {
1696 s = *ps++;
1697 m = *pm++;
1698 d = *pd;
1699
1700 *pd++ = pack_1x64_32 (
1701 pix_multiply_1x64 (
1702 pix_multiply_1x64 (
1703 unpack_32_1x64 (s), unpack_32_1x64 (m)),
1704 expand_alpha_1x64 (unpack_32_1x64 (d))));
1705
1706 w--;
1707 }
1708}
1709
1710static force_inline__inline__ __attribute__ ((__always_inline__)) void
1711core_combine_in_reverse_ca_sse2 (uint32_t * pd,
1712 const uint32_t *ps,
1713 const uint32_t *pm,
1714 int w)
1715{
1716 uint32_t s, m, d;
1717
1718 __m128i xmm_alpha_lo, xmm_alpha_hi;
1719 __m128i xmm_src_lo, xmm_src_hi;
1720 __m128i xmm_dst_lo, xmm_dst_hi;
1721 __m128i xmm_mask_lo, xmm_mask_hi;
1722
1723 while (w && (unsigned long)pd & 15)
1724 {
1725 s = *ps++;
1726 m = *pm++;
1727 d = *pd;
1728
1729 *pd++ = pack_1x64_32 (
1730 pix_multiply_1x64 (
1731 unpack_32_1x64 (d),
1732 pix_multiply_1x64 (unpack_32_1x64 (m),
1733 expand_alpha_1x64 (unpack_32_1x64 (s)))));
1734 w--;
1735 }
1736
1737 while (w >= 4)
1738 {
1739 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1740 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1741 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1742
1743 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1744 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1745 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1746
1747 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1748 &xmm_alpha_lo, &xmm_alpha_hi);
1749 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
1750 &xmm_alpha_lo, &xmm_alpha_hi,
1751 &xmm_alpha_lo, &xmm_alpha_hi);
1752
1753 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1754 &xmm_alpha_lo, &xmm_alpha_hi,
1755 &xmm_dst_lo, &xmm_dst_hi);
1756
1757 save_128_aligned (
1758 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1759
1760 ps += 4;
1761 pd += 4;
1762 pm += 4;
1763 w -= 4;
1764 }
1765
1766 while (w)
1767 {
1768 s = *ps++;
1769 m = *pm++;
1770 d = *pd;
1771
1772 *pd++ = pack_1x64_32 (
1773 pix_multiply_1x64 (
1774 unpack_32_1x64 (d),
1775 pix_multiply_1x64 (unpack_32_1x64 (m),
1776 expand_alpha_1x64 (unpack_32_1x64 (s)))));
1777 w--;
1778 }
1779}
1780
1781static force_inline__inline__ __attribute__ ((__always_inline__)) void
1782core_combine_out_ca_sse2 (uint32_t * pd,
1783 const uint32_t *ps,
1784 const uint32_t *pm,
1785 int w)
1786{
1787 uint32_t s, m, d;
1788
1789 __m128i xmm_alpha_lo, xmm_alpha_hi;
1790 __m128i xmm_src_lo, xmm_src_hi;
1791 __m128i xmm_dst_lo, xmm_dst_hi;
1792 __m128i xmm_mask_lo, xmm_mask_hi;
1793
1794 while (w && (unsigned long)pd & 15)
1795 {
1796 s = *ps++;
1797 m = *pm++;
1798 d = *pd;
1799
1800 *pd++ = pack_1x64_32 (
1801 pix_multiply_1x64 (
1802 pix_multiply_1x64 (
1803 unpack_32_1x64 (s), unpack_32_1x64 (m)),
1804 negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
1805 w--;
1806 }
1807
1808 while (w >= 4)
1809 {
1810 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1811 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1812 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1813
1814 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1815 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1816 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1817
1818 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1819 &xmm_alpha_lo, &xmm_alpha_hi);
1820 negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,
1821 &xmm_alpha_lo, &xmm_alpha_hi);
1822
1823 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1824 &xmm_mask_lo, &xmm_mask_hi,
1825 &xmm_dst_lo, &xmm_dst_hi);
1826 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1827 &xmm_alpha_lo, &xmm_alpha_hi,
1828 &xmm_dst_lo, &xmm_dst_hi);
1829
1830 save_128_aligned (
1831 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1832
1833 ps += 4;
1834 pd += 4;
1835 pm += 4;
1836 w -= 4;
1837 }
1838
1839 while (w)
1840 {
1841 s = *ps++;
1842 m = *pm++;
1843 d = *pd;
1844
1845 *pd++ = pack_1x64_32 (
1846 pix_multiply_1x64 (
1847 pix_multiply_1x64 (
1848 unpack_32_1x64 (s), unpack_32_1x64 (m)),
1849 negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
1850
1851 w--;
1852 }
1853}
1854
1855static force_inline__inline__ __attribute__ ((__always_inline__)) void
1856core_combine_out_reverse_ca_sse2 (uint32_t * pd,
1857 const uint32_t *ps,
1858 const uint32_t *pm,
1859 int w)
1860{
1861 uint32_t s, m, d;
1862
1863 __m128i xmm_alpha_lo, xmm_alpha_hi;
1864 __m128i xmm_src_lo, xmm_src_hi;
1865 __m128i xmm_dst_lo, xmm_dst_hi;
1866 __m128i xmm_mask_lo, xmm_mask_hi;
1867
1868 while (w && (unsigned long)pd & 15)
1869 {
1870 s = *ps++;
1871 m = *pm++;
1872 d = *pd;
1873
1874 *pd++ = pack_1x64_32 (
1875 pix_multiply_1x64 (
1876 unpack_32_1x64 (d),
1877 negate_1x64 (pix_multiply_1x64 (
1878 unpack_32_1x64 (m),
1879 expand_alpha_1x64 (unpack_32_1x64 (s))))));
1880 w--;
1881 }
1882
1883 while (w >= 4)
1884 {
1885 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1886 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1887 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1888
1889 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1890 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1891 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1892
1893 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1894 &xmm_alpha_lo, &xmm_alpha_hi);
1895
1896 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
1897 &xmm_alpha_lo, &xmm_alpha_hi,
1898 &xmm_mask_lo, &xmm_mask_hi);
1899
1900 negate_2x128 (xmm_mask_lo, xmm_mask_hi,
1901 &xmm_mask_lo, &xmm_mask_hi);
1902
1903 pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
1904 &xmm_mask_lo, &xmm_mask_hi,
1905 &xmm_dst_lo, &xmm_dst_hi);
1906
1907 save_128_aligned (
1908 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
1909
1910 ps += 4;
1911 pd += 4;
1912 pm += 4;
1913 w -= 4;
1914 }
1915
1916 while (w)
1917 {
1918 s = *ps++;
1919 m = *pm++;
1920 d = *pd;
1921
1922 *pd++ = pack_1x64_32 (
1923 pix_multiply_1x64 (
1924 unpack_32_1x64 (d),
1925 negate_1x64 (pix_multiply_1x64 (
1926 unpack_32_1x64 (m),
1927 expand_alpha_1x64 (unpack_32_1x64 (s))))));
1928 w--;
1929 }
1930}
1931
1932static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
1933core_combine_atop_ca_pixel_sse2 (uint32_t src,
1934 uint32_t mask,
1935 uint32_t dst)
1936{
1937 __m64 m = unpack_32_1x64 (mask);
1938 __m64 s = unpack_32_1x64 (src);
1939 __m64 d = unpack_32_1x64 (dst);
1940 __m64 sa = expand_alpha_1x64 (s);
1941 __m64 da = expand_alpha_1x64 (d);
1942
1943 s = pix_multiply_1x64 (s, m);
1944 m = negate_1x64 (pix_multiply_1x64 (m, sa));
1945
1946 return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da));
1947}
1948
1949static force_inline__inline__ __attribute__ ((__always_inline__)) void
1950core_combine_atop_ca_sse2 (uint32_t * pd,
1951 const uint32_t *ps,
1952 const uint32_t *pm,
1953 int w)
1954{
1955 uint32_t s, m, d;
1956
1957 __m128i xmm_src_lo, xmm_src_hi;
1958 __m128i xmm_dst_lo, xmm_dst_hi;
1959 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
1960 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
1961 __m128i xmm_mask_lo, xmm_mask_hi;
1962
1963 while (w && (unsigned long)pd & 15)
1964 {
1965 s = *ps++;
1966 m = *pm++;
1967 d = *pd;
1968
1969 *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
1970 w--;
1971 }
1972
1973 while (w >= 4)
1974 {
1975 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
1976 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
1977 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
1978
1979 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
1980 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
1981 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1982
1983 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
1984 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
1985 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
1986 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
1987
1988 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
1989 &xmm_mask_lo, &xmm_mask_hi,
1990 &xmm_src_lo, &xmm_src_hi);
1991 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
1992 &xmm_alpha_src_lo, &xmm_alpha_src_hi,
1993 &xmm_mask_lo, &xmm_mask_hi);
1994
1995 negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
1996
1997 pix_add_multiply_2x128 (
1998 &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
1999 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
2000 &xmm_dst_lo, &xmm_dst_hi);
2001
2002 save_128_aligned (
2003 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2004
2005 ps += 4;
2006 pd += 4;
2007 pm += 4;
2008 w -= 4;
2009 }
2010
2011 while (w)
2012 {
2013 s = *ps++;
2014 m = *pm++;
2015 d = *pd;
2016
2017 *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
2018 w--;
2019 }
2020}
2021
2022static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
2023core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
2024 uint32_t mask,
2025 uint32_t dst)
2026{
2027 __m64 m = unpack_32_1x64 (mask);
2028 __m64 s = unpack_32_1x64 (src);
2029 __m64 d = unpack_32_1x64 (dst);
2030
2031 __m64 da = negate_1x64 (expand_alpha_1x64 (d));
2032 __m64 sa = expand_alpha_1x64 (s);
2033
2034 s = pix_multiply_1x64 (s, m);
2035 m = pix_multiply_1x64 (m, sa);
2036
2037 return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da));
2038}
2039
2040static force_inline__inline__ __attribute__ ((__always_inline__)) void
2041core_combine_reverse_atop_ca_sse2 (uint32_t * pd,
2042 const uint32_t *ps,
2043 const uint32_t *pm,
2044 int w)
2045{
2046 uint32_t s, m, d;
2047
2048 __m128i xmm_src_lo, xmm_src_hi;
2049 __m128i xmm_dst_lo, xmm_dst_hi;
2050 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
2051 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
2052 __m128i xmm_mask_lo, xmm_mask_hi;
2053
2054 while (w && (unsigned long)pd & 15)
2055 {
2056 s = *ps++;
2057 m = *pm++;
2058 d = *pd;
2059
2060 *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
2061 w--;
2062 }
2063
2064 while (w >= 4)
2065 {
2066 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2067 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2068 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2069
2070 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2071 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2072 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2073
2074 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2075 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
2076 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
2077 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2078
2079 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2080 &xmm_mask_lo, &xmm_mask_hi,
2081 &xmm_src_lo, &xmm_src_hi);
2082 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
2083 &xmm_alpha_src_lo, &xmm_alpha_src_hi,
2084 &xmm_mask_lo, &xmm_mask_hi);
2085
2086 negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
2087 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2088
2089 pix_add_multiply_2x128 (
2090 &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
2091 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
2092 &xmm_dst_lo, &xmm_dst_hi);
2093
2094 save_128_aligned (
2095 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2096
2097 ps += 4;
2098 pd += 4;
2099 pm += 4;
2100 w -= 4;
2101 }
2102
2103 while (w)
2104 {
2105 s = *ps++;
2106 m = *pm++;
2107 d = *pd;
2108
2109 *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
2110 w--;
2111 }
2112}
2113
2114static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t
2115core_combine_xor_ca_pixel_sse2 (uint32_t src,
2116 uint32_t mask,
2117 uint32_t dst)
2118{
2119 __m64 a = unpack_32_1x64 (mask);
2120 __m64 s = unpack_32_1x64 (src);
2121 __m64 d = unpack_32_1x64 (dst);
2122
2123 __m64 alpha_dst = negate_1x64 (pix_multiply_1x64 (
2124 a, expand_alpha_1x64 (s)));
2125 __m64 dest = pix_multiply_1x64 (s, a);
2126 __m64 alpha_src = negate_1x64 (expand_alpha_1x64 (d));
2127
2128 return pack_1x64_32 (pix_add_multiply_1x64 (&d,
2129 &alpha_dst,
2130 &dest,
2131 &alpha_src));
2132}
2133
2134static force_inline__inline__ __attribute__ ((__always_inline__)) void
2135core_combine_xor_ca_sse2 (uint32_t * pd,
2136 const uint32_t *ps,
2137 const uint32_t *pm,
2138 int w)
2139{
2140 uint32_t s, m, d;
2141
2142 __m128i xmm_src_lo, xmm_src_hi;
2143 __m128i xmm_dst_lo, xmm_dst_hi;
2144 __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
2145 __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
2146 __m128i xmm_mask_lo, xmm_mask_hi;
2147
2148 while (w && (unsigned long)pd & 15)
2149 {
2150 s = *ps++;
2151 m = *pm++;
2152 d = *pd;
2153
2154 *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
2155 w--;
2156 }
2157
2158 while (w >= 4)
2159 {
2160 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2161 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2162 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2163
2164 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2165 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2166 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2167
2168 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
2169 &xmm_alpha_src_lo, &xmm_alpha_src_hi);
2170 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
2171 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2172
2173 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2174 &xmm_mask_lo, &xmm_mask_hi,
2175 &xmm_src_lo, &xmm_src_hi);
2176 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
2177 &xmm_alpha_src_lo, &xmm_alpha_src_hi,
2178 &xmm_mask_lo, &xmm_mask_hi);
2179
2180 negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
2181 &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
2182 negate_2x128 (xmm_mask_lo, xmm_mask_hi,
2183 &xmm_mask_lo, &xmm_mask_hi);
2184
2185 pix_add_multiply_2x128 (
2186 &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
2187 &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
2188 &xmm_dst_lo, &xmm_dst_hi);
2189
2190 save_128_aligned (
2191 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2192
2193 ps += 4;
2194 pd += 4;
2195 pm += 4;
2196 w -= 4;
2197 }
2198
2199 while (w)
2200 {
2201 s = *ps++;
2202 m = *pm++;
2203 d = *pd;
2204
2205 *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
2206 w--;
2207 }
2208}
2209
2210static force_inline__inline__ __attribute__ ((__always_inline__)) void
2211core_combine_add_ca_sse2 (uint32_t * pd,
2212 const uint32_t *ps,
2213 const uint32_t *pm,
2214 int w)
2215{
2216 uint32_t s, m, d;
2217
2218 __m128i xmm_src_lo, xmm_src_hi;
2219 __m128i xmm_dst_lo, xmm_dst_hi;
2220 __m128i xmm_mask_lo, xmm_mask_hi;
2221
2222 while (w && (unsigned long)pd & 15)
2223 {
2224 s = *ps++;
2225 m = *pm++;
2226 d = *pd;
2227
2228 *pd++ = pack_1x64_32 (
2229 _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
2230 unpack_32_1x64 (m)),
2231 unpack_32_1x64 (d)));
2232 w--;
2233 }
2234
2235 while (w >= 4)
2236 {
2237 xmm_src_hi = load_128_unaligned ((__m128i*)ps);
2238 xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
2239 xmm_dst_hi = load_128_aligned ((__m128i*)pd);
2240
2241 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
2242 unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
2243 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
2244
2245 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
2246 &xmm_mask_lo, &xmm_mask_hi,
2247 &xmm_src_lo, &xmm_src_hi);
2248
2249 save_128_aligned (
2250 (__m128i*)pd, pack_2x128_128 (
2251 _mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
2252 _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
2253
2254 ps += 4;
2255 pd += 4;
2256 pm += 4;
2257 w -= 4;
2258 }
2259
2260 while (w)
2261 {
2262 s = *ps++;
2263 m = *pm++;
2264 d = *pd;
2265
2266 *pd++ = pack_1x64_32 (
2267 _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
2268 unpack_32_1x64 (m)),
2269 unpack_32_1x64 (d)));
2270 w--;
2271 }
2272}
2273
2274/* ---------------------------------------------------
2275 * fb_compose_setup_sSE2
2276 */
2277static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
2278create_mask_16_64 (uint16_t mask)
2279{
2280 return _mm_set1_pi16 (mask);
2281}
2282
2283static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
2284create_mask_16_128 (uint16_t mask)
2285{
2286 return _mm_set1_epi16 (mask);
2287}
2288
2289static force_inline__inline__ __attribute__ ((__always_inline__)) __m64
2290create_mask_2x32_64 (uint32_t mask0,
2291 uint32_t mask1)
2292{
2293 return _mm_set_pi32 (mask0, mask1);
2294}
2295
2296/* Work around a code generation bug in Sun Studio 12. */
2297#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
2298# define create_mask_2x32_128(mask0, mask1) \
2299 (_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))
2300#else
2301static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i
2302create_mask_2x32_128 (uint32_t mask0,
2303 uint32_t mask1)
2304{
2305 return _mm_set_epi32 (mask0, mask1, mask0, mask1);
2306}
2307#endif
2308
2309/* SSE2 code patch for fbcompose.c */
2310
2311static void
2312sse2_combine_over_u (pixman_implementation_t *imp,
2313 pixman_op_t op,
2314 uint32_t * dst,
2315 const uint32_t * src,
2316 const uint32_t * mask,
2317 int width)
2318{
2319 core_combine_over_u_sse2 (dst, src, mask, width);
2320 _mm_empty ();
2321}
2322
2323static void
2324sse2_combine_over_reverse_u (pixman_implementation_t *imp,
2325 pixman_op_t op,
2326 uint32_t * dst,
2327 const uint32_t * src,
2328 const uint32_t * mask,
2329 int width)
2330{
2331 core_combine_over_reverse_u_sse2 (dst, src, mask, width);
2332 _mm_empty ();
2333}
2334
2335static void
2336sse2_combine_in_u (pixman_implementation_t *imp,
2337 pixman_op_t op,
2338 uint32_t * dst,
2339 const uint32_t * src,
2340 const uint32_t * mask,
2341 int width)
2342{
2343 core_combine_in_u_sse2 (dst, src, mask, width);
2344 _mm_empty ();
2345}
2346
2347static void
2348sse2_combine_in_reverse_u (pixman_implementation_t *imp,
2349 pixman_op_t op,
2350 uint32_t * dst,
2351 const uint32_t * src,
2352 const uint32_t * mask,
2353 int width)
2354{
2355 core_combine_reverse_in_u_sse2 (dst, src, mask, width);
2356 _mm_empty ();
2357}
2358
2359static void
2360sse2_combine_out_u (pixman_implementation_t *imp,
2361 pixman_op_t op,
2362 uint32_t * dst,
2363 const uint32_t * src,
2364 const uint32_t * mask,
2365 int width)
2366{
2367 core_combine_out_u_sse2 (dst, src, mask, width);
2368 _mm_empty ();
2369}
2370
2371static void
2372sse2_combine_out_reverse_u (pixman_implementation_t *imp,
2373 pixman_op_t op,
2374 uint32_t * dst,
2375 const uint32_t * src,
2376 const uint32_t * mask,
2377 int width)
2378{
2379 core_combine_reverse_out_u_sse2 (dst, src, mask, width);
2380 _mm_empty ();
2381}
2382
2383static void
2384sse2_combine_atop_u (pixman_implementation_t *imp,
2385 pixman_op_t op,
2386 uint32_t * dst,
2387 const uint32_t * src,
2388 const uint32_t * mask,
2389 int width)
2390{
2391 core_combine_atop_u_sse2 (dst, src, mask, width);
2392 _mm_empty ();
2393}
2394
2395static void
2396sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
2397 pixman_op_t op,
2398 uint32_t * dst,
2399 const uint32_t * src,
2400 const uint32_t * mask,
2401 int width)
2402{
2403 core_combine_reverse_atop_u_sse2 (dst, src, mask, width);
2404 _mm_empty ();
2405}
2406
2407static void
2408sse2_combine_xor_u (pixman_implementation_t *imp,
2409 pixman_op_t op,
2410 uint32_t * dst,
2411 const uint32_t * src,
2412 const uint32_t * mask,
2413 int width)
2414{
2415 core_combine_xor_u_sse2 (dst, src, mask, width);
2416 _mm_empty ();
2417}
2418
2419static void
2420sse2_combine_add_u (pixman_implementation_t *imp,
2421 pixman_op_t op,
2422 uint32_t * dst,
2423 const uint32_t * src,
2424 const uint32_t * mask,
2425 int width)
2426{
2427 core_combine_add_u_sse2 (dst, src, mask, width);
2428 _mm_empty ();
2429}
2430
2431static void
2432sse2_combine_saturate_u (pixman_implementation_t *imp,
2433 pixman_op_t op,
2434 uint32_t * dst,
2435 const uint32_t * src,
2436 const uint32_t * mask,
2437 int width)
2438{
2439 core_combine_saturate_u_sse2 (dst, src, mask, width);
2440 _mm_empty ();
2441}
2442
2443static void
2444sse2_combine_src_ca (pixman_implementation_t *imp,
2445 pixman_op_t op,
2446 uint32_t * dst,
2447 const uint32_t * src,
2448 const uint32_t * mask,
2449 int width)
2450{
2451 core_combine_src_ca_sse2 (dst, src, mask, width);
2452 _mm_empty ();
2453}
2454
2455static void
2456sse2_combine_over_ca (pixman_implementation_t *imp,
2457 pixman_op_t op,
2458 uint32_t * dst,
2459 const uint32_t * src,
2460 const uint32_t * mask,
2461 int width)
2462{
2463 core_combine_over_ca_sse2 (dst, src, mask, width);
2464 _mm_empty ();
2465}
2466
2467static void
2468sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
2469 pixman_op_t op,
2470 uint32_t * dst,
2471 const uint32_t * src,
2472 const uint32_t * mask,
2473 int width)
2474{
2475 core_combine_over_reverse_ca_sse2 (dst, src, mask, width);
2476 _mm_empty ();
2477}
2478
2479static void
2480sse2_combine_in_ca (pixman_implementation_t *imp,
2481 pixman_op_t op,
2482 uint32_t * dst,
2483 const uint32_t * src,
2484 const uint32_t * mask,
2485 int width)
2486{
2487 core_combine_in_ca_sse2 (dst, src, mask, width);
2488 _mm_empty ();
2489}
2490
2491static void
2492sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
2493 pixman_op_t op,
2494 uint32_t * dst,
2495 const uint32_t * src,
2496 const uint32_t * mask,
2497 int width)
2498{
2499 core_combine_in_reverse_ca_sse2 (dst, src, mask, width);
2500 _mm_empty ();
2501}
2502
2503static void
2504sse2_combine_out_ca (pixman_implementation_t *imp,
2505 pixman_op_t op,
2506 uint32_t * dst,
2507 const uint32_t * src,
2508 const uint32_t * mask,
2509 int width)
2510{
2511 core_combine_out_ca_sse2 (dst, src, mask, width);
2512 _mm_empty ();
2513}
2514
2515static void
2516sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
2517 pixman_op_t op,
2518 uint32_t * dst,
2519 const uint32_t * src,
2520 const uint32_t * mask,
2521 int width)
2522{
2523 core_combine_out_reverse_ca_sse2 (dst, src, mask, width);
2524 _mm_empty ();
2525}
2526
2527static void
2528sse2_combine_atop_ca (pixman_implementation_t *imp,
2529 pixman_op_t op,
2530 uint32_t * dst,
2531 const uint32_t * src,
2532 const uint32_t * mask,
2533 int width)
2534{
2535 core_combine_atop_ca_sse2 (dst, src, mask, width);
2536 _mm_empty ();
2537}
2538
2539static void
2540sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
2541 pixman_op_t op,
2542 uint32_t * dst,
2543 const uint32_t * src,
2544 const uint32_t * mask,
2545 int width)
2546{
2547 core_combine_reverse_atop_ca_sse2 (dst, src, mask, width);
2548 _mm_empty ();
2549}
2550
2551static void
2552sse2_combine_xor_ca (pixman_implementation_t *imp,
2553 pixman_op_t op,
2554 uint32_t * dst,
2555 const uint32_t * src,
2556 const uint32_t * mask,
2557 int width)
2558{
2559 core_combine_xor_ca_sse2 (dst, src, mask, width);
2560 _mm_empty ();
2561}
2562
2563static void
2564sse2_combine_add_ca (pixman_implementation_t *imp,
2565 pixman_op_t op,
2566 uint32_t * dst,
2567 const uint32_t * src,
2568 const uint32_t * mask,
2569 int width)
2570{
2571 core_combine_add_ca_sse2 (dst, src, mask, width);
2572 _mm_empty ();
2573}
2574
2575/* -------------------------------------------------------------------
2576 * composite_over_n_8888
2577 */
2578
2579static void
2580sse2_composite_over_n_8888 (pixman_implementation_t *imp,
2581 pixman_op_t op,
2582 pixman_image_t * src_image,
2583 pixman_image_t * mask_image,
2584 pixman_image_t * dst_image,
2585 int32_t src_x,
2586 int32_t src_y,
2587 int32_t mask_x,
2588 int32_t mask_y,
2589 int32_t dest_x,
2590 int32_t dest_y,
2591 int32_t width,
2592 int32_t height)
2593{
2594 uint32_t src;
2595 uint32_t *dst_line, *dst, d;
2596 int32_t w;
2597 int dst_stride;
2598 __m128i xmm_src, xmm_alpha;
2599 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2600
2601 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
2602
2603 if (src == 0)
2604 return;
2605
2606 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
2607 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
2608
2609 xmm_src = expand_pixel_32_1x128 (src);
2610 xmm_alpha = expand_alpha_1x128 (xmm_src);
2611
2612 while (height--)
2613 {
2614 dst = dst_line;
2615
2616 dst_line += dst_stride;
2617 w = width;
2618
2619 while (w && (unsigned long)dst & 15)
2620 {
2621 d = *dst;
2622 *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
2623 _mm_movepi64_pi64 (xmm_alpha),
2624 unpack_32_1x64 (d)));
2625 w--;
2626 }
2627
2628 while (w >= 4)
2629 {
2630 xmm_dst = load_128_aligned ((__m128i*)dst);
2631
2632 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
2633
2634 over_2x128 (&xmm_src, &xmm_src,
2635 &xmm_alpha, &xmm_alpha,
2636 &xmm_dst_lo, &xmm_dst_hi);
2637
2638 /* rebuid the 4 pixel data and save*/
2639 save_128_aligned (
2640 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2641
2642 w -= 4;
2643 dst += 4;
2644 }
2645
2646 while (w)
2647 {
2648 d = *dst;
2649 *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
2650 _mm_movepi64_pi64 (xmm_alpha),
2651 unpack_32_1x64 (d)));
2652 w--;
2653 }
2654
2655 }
2656 _mm_empty ();
2657}
2658
2659/* ---------------------------------------------------------------------
2660 * composite_over_n_0565
2661 */
2662static void
2663sse2_composite_over_n_0565 (pixman_implementation_t *imp,
2664 pixman_op_t op,
2665 pixman_image_t * src_image,
2666 pixman_image_t * mask_image,
2667 pixman_image_t * dst_image,
2668 int32_t src_x,
2669 int32_t src_y,
2670 int32_t mask_x,
2671 int32_t mask_y,
2672 int32_t dest_x,
2673 int32_t dest_y,
2674 int32_t width,
2675 int32_t height)
2676{
2677 uint32_t src;
2678 uint16_t *dst_line, *dst, d;
2679 int32_t w;
2680 int dst_stride;
2681 __m128i xmm_src, xmm_alpha;
2682 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
2683
2684 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
2685
2686 if (src == 0)
2687 return;
2688
2689 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
2690 dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
2691
2692 xmm_src = expand_pixel_32_1x128 (src);
2693 xmm_alpha = expand_alpha_1x128 (xmm_src);
2694
2695 while (height--)
2696 {
2697 dst = dst_line;
2698
2699 dst_line += dst_stride;
2700 w = width;
2701
2702 while (w && (unsigned long)dst & 15)
2703 {
2704 d = *dst;
2705
2706 *dst++ = pack_565_32_16 (
2707 pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
2708 _mm_movepi64_pi64 (xmm_alpha),
2709 expand565_16_1x64 (d))));
2710 w--;
2711 }
2712
2713 while (w >= 8)
2714 {
2715 xmm_dst = load_128_aligned ((__m128i*)dst);
2716
2717 unpack_565_128_4x128 (xmm_dst,
2718 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
2719
2720 over_2x128 (&xmm_src, &xmm_src,
2721 &xmm_alpha, &xmm_alpha,
2722 &xmm_dst0, &xmm_dst1);
2723 over_2x128 (&xmm_src, &xmm_src,
2724 &xmm_alpha, &xmm_alpha,
2725 &xmm_dst2, &xmm_dst3);
2726
2727 xmm_dst = pack_565_4x128_128 (
2728 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
2729
2730 save_128_aligned ((__m128i*)dst, xmm_dst);
2731
2732 dst += 8;
2733 w -= 8;
2734 }
2735
2736 while (w--)
2737 {
2738 d = *dst;
2739 *dst++ = pack_565_32_16 (
2740 pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
2741 _mm_movepi64_pi64 (xmm_alpha),
2742 expand565_16_1x64 (d))));
2743 }
2744 }
2745
2746 _mm_empty ();
2747}
2748
2749/* ------------------------------
2750 * composite_add_n_8888_8888_ca
2751 */
2752static void
2753sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
2754 pixman_op_t op,
2755 pixman_image_t * src_image,
2756 pixman_image_t * mask_image,
2757 pixman_image_t * dst_image,
2758 int32_t src_x,
2759 int32_t src_y,
2760 int32_t mask_x,
2761 int32_t mask_y,
2762 int32_t dest_x,
2763 int32_t dest_y,
2764 int32_t width,
2765 int32_t height)
2766{
2767 uint32_t src, srca;
2768 uint32_t *dst_line, d;
2769 uint32_t *mask_line, m;
2770 uint32_t pack_cmp;
2771 int dst_stride, mask_stride;
2772
2773 __m128i xmm_src, xmm_alpha;
2774 __m128i xmm_dst;
2775 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
2776
2777 __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
2778
2779 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
2780 srca = src >> 24;
2781
2782 if (src == 0)
2783 return;
2784
2785 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
2786 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
2787 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
2788 mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
2789
2790 xmm_src = _mm_unpacklo_epi8 (
2791 create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
2792 xmm_alpha = expand_alpha_1x128 (xmm_src);
2793 mmx_src = _mm_movepi64_pi64 (xmm_src);
2794 mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
2795
2796 while (height--)
2797 {
2798 int w = width;
2799 const uint32_t *pm = (uint32_t *)mask_line;
2800 uint32_t *pd = (uint32_t *)dst_line;
2801
2802 dst_line += dst_stride;
2803 mask_line += mask_stride;
2804
2805 while (w && (unsigned long)pd & 15)
2806 {
2807 m = *pm++;
2808
2809 if (m)
2810 {
2811 d = *pd;
2812
2813 mmx_mask = unpack_32_1x64 (m);
2814 mmx_dest = unpack_32_1x64 (d);
2815
2816 *pd = pack_1x64_32 (
2817 _mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest));
2818 }
2819
2820 pd++;
2821 w--;
2822 }
2823
2824 while (w >= 4)
2825 {
2826 xmm_mask = load_128_unaligned ((__m128i*)pm);
2827
2828 pack_cmp =
2829 _mm_movemask_epi8 (
2830 _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
2831
2832 /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
2833 if (pack_cmp != 0xffff)
2834 {
2835 xmm_dst = load_128_aligned ((__m128i*)pd);
2836
2837 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
2838
2839 pix_multiply_2x128 (&xmm_src, &xmm_src,
2840 &xmm_mask_lo, &xmm_mask_hi,
2841 &xmm_mask_lo, &xmm_mask_hi);
2842 xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);
2843
2844 save_128_aligned (
2845 (__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));
2846 }
2847
2848 pd += 4;
2849 pm += 4;
2850 w -= 4;
2851 }
2852
2853 while (w)
2854 {
2855 m = *pm++;
2856
2857 if (m)
2858 {
2859 d = *pd;
2860
2861 mmx_mask = unpack_32_1x64 (m);
2862 mmx_dest = unpack_32_1x64 (d);
2863
2864 *pd = pack_1x64_32 (
2865 _mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest));
2866 }
2867
2868 pd++;
2869 w--;
2870 }
2871 }
2872
2873 _mm_empty ();
2874}
2875
2876/* ---------------------------------------------------------------------------
2877 * composite_over_n_8888_8888_ca
2878 */
2879
2880static void
2881sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
2882 pixman_op_t op,
2883 pixman_image_t * src_image,
2884 pixman_image_t * mask_image,
2885 pixman_image_t * dst_image,
2886 int32_t src_x,
2887 int32_t src_y,
2888 int32_t mask_x,
2889 int32_t mask_y,
2890 int32_t dest_x,
2891 int32_t dest_y,
2892 int32_t width,
2893 int32_t height)
2894{
2895 uint32_t src;
2896 uint32_t *dst_line, d;
2897 uint32_t *mask_line, m;
2898 uint32_t pack_cmp;
2899 int dst_stride, mask_stride;
2900
2901 __m128i xmm_src, xmm_alpha;
2902 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
2903 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
2904
2905 __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
2906
2907 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
2908
2909 if (src == 0)
2910 return;
2911
2912 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
2913 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
2914 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
2915 mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
2916
2917 xmm_src = _mm_unpacklo_epi8 (
2918 create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
2919 xmm_alpha = expand_alpha_1x128 (xmm_src);
2920 mmx_src = _mm_movepi64_pi64 (xmm_src);
2921 mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
2922
2923 while (height--)
2924 {
2925 int w = width;
2926 const uint32_t *pm = (uint32_t *)mask_line;
2927 uint32_t *pd = (uint32_t *)dst_line;
2928
2929 dst_line += dst_stride;
2930 mask_line += mask_stride;
2931
2932 while (w && (unsigned long)pd & 15)
2933 {
2934 m = *pm++;
2935
2936 if (m)
2937 {
2938 d = *pd;
2939 mmx_mask = unpack_32_1x64 (m);
2940 mmx_dest = unpack_32_1x64 (d);
2941
2942 *pd = pack_1x64_32 (in_over_1x64 (&mmx_src,
2943 &mmx_alpha,
2944 &mmx_mask,
2945 &mmx_dest));
2946 }
2947
2948 pd++;
2949 w--;
2950 }
2951
2952 while (w >= 4)
2953 {
2954 xmm_mask = load_128_unaligned ((__m128i*)pm);
2955
2956 pack_cmp =
2957 _mm_movemask_epi8 (
2958 _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
2959
2960 /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
2961 if (pack_cmp != 0xffff)
2962 {
2963 xmm_dst = load_128_aligned ((__m128i*)pd);
2964
2965 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
2966 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
2967
2968 in_over_2x128 (&xmm_src, &xmm_src,
2969 &xmm_alpha, &xmm_alpha,
2970 &xmm_mask_lo, &xmm_mask_hi,
2971 &xmm_dst_lo, &xmm_dst_hi);
2972
2973 save_128_aligned (
2974 (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
2975 }
2976
2977 pd += 4;
2978 pm += 4;
2979 w -= 4;
2980 }
2981
2982 while (w)
2983 {
2984 m = *pm++;
2985
2986 if (m)
2987 {
2988 d = *pd;
2989 mmx_mask = unpack_32_1x64 (m);
2990 mmx_dest = unpack_32_1x64 (d);
2991
2992 *pd = pack_1x64_32 (
2993 in_over_1x64 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
2994 }
2995
2996 pd++;
2997 w--;
2998 }
2999 }
3000
3001 _mm_empty ();
3002}
3003
3004/*---------------------------------------------------------------------
3005 * composite_over_8888_n_8888
3006 */
3007
3008static void
3009sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
3010 pixman_op_t op,
3011 pixman_image_t * src_image,
3012 pixman_image_t * mask_image,
3013 pixman_image_t * dst_image,
3014 int32_t src_x,
3015 int32_t src_y,
3016 int32_t mask_x,
3017 int32_t mask_y,
3018 int32_t dest_x,
3019 int32_t dest_y,
3020 int32_t width,
3021 int32_t height)
3022{
3023 uint32_t *dst_line, *dst;
3024 uint32_t *src_line, *src;
3025 uint32_t mask;
3026 int32_t w;
3027 int dst_stride, src_stride;
3028
3029 __m128i xmm_mask;
3030 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
3031 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
3032 __m128i xmm_alpha_lo, xmm_alpha_hi;
3033
3034 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
3035 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
3036 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3037 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
3038
3039 mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
3040
3041 xmm_mask = create_mask_16_128 (mask >> 24);
3042
3043 while (height--)
3044 {
3045 dst = dst_line;
3046 dst_line += dst_stride;
3047 src = src_line;
3048 src_line += src_stride;
3049 w = width;
3050
3051 while (w && (unsigned long)dst & 15)
3052 {
3053 uint32_t s = *src++;
3054
3055 if (s)
3056 {
3057 uint32_t d = *dst;
3058
3059 __m64 ms = unpack_32_1x64 (s);
3060 __m64 alpha = expand_alpha_1x64 (ms);
3061 __m64 dest = _mm_movepi64_pi64 (xmm_mask);
3062 __m64 alpha_dst = unpack_32_1x64 (d);
3063
3064 *dst = pack_1x64_32 (
3065 in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
3066 }
3067 dst++;
3068 w--;
3069 }
3070
3071 while (w >= 4)
3072 {
3073 xmm_src = load_128_unaligned ((__m128i*)src);
3074
3075 if (!is_zero (xmm_src))
3076 {
3077 xmm_dst = load_128_aligned ((__m128i*)dst);
3078
3079 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3080 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
3081 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
3082 &xmm_alpha_lo, &xmm_alpha_hi);
3083
3084 in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
3085 &xmm_alpha_lo, &xmm_alpha_hi,
3086 &xmm_mask, &xmm_mask,
3087 &xmm_dst_lo, &xmm_dst_hi);
3088
3089 save_128_aligned (
3090 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3091 }
3092
3093 dst += 4;
3094 src += 4;
3095 w -= 4;
3096 }
3097
3098 while (w)
3099 {
3100 uint32_t s = *src++;
3101
3102 if (s)
3103 {
3104 uint32_t d = *dst;
3105
3106 __m64 ms = unpack_32_1x64 (s);
3107 __m64 alpha = expand_alpha_1x64 (ms);
3108 __m64 mask = _mm_movepi64_pi64 (xmm_mask);
3109 __m64 dest = unpack_32_1x64 (d);
3110
3111 *dst = pack_1x64_32 (
3112 in_over_1x64 (&ms, &alpha, &mask, &dest));
3113 }
3114
3115 dst++;
3116 w--;
3117 }
3118 }
3119
3120 _mm_empty ();
3121}
3122
3123/*---------------------------------------------------------------------
3124 * composite_over_8888_n_8888
3125 */
3126
3127static void
3128sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
3129 pixman_op_t op,
3130 pixman_image_t * src_image,
3131 pixman_image_t * mask_image,
3132 pixman_image_t * dst_image,
3133 int32_t src_x,
3134 int32_t src_y,
3135 int32_t mask_x,
3136 int32_t mask_y,
3137 int32_t dest_x,
3138 int32_t dest_y,
3139 int32_t width,
3140 int32_t height)
3141{
3142 uint32_t *dst_line, *dst;
3143 uint32_t *src_line, *src;
3144 int32_t w;
3145 int dst_stride, src_stride;
3146
3147
3148 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
3149 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
3150 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3151 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
3152
3153 while (height--)
3154 {
3155 dst = dst_line;
3156 dst_line += dst_stride;
3157 src = src_line;
3158 src_line += src_stride;
3159 w = width;
3160
3161 while (w && (unsigned long)dst & 15)
3162 {
3163 *dst++ = *src++ | 0xff000000;
3164 w--;
3165 }
3166
3167 while (w >= 16)
3168 {
3169 __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
3170
3171 xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
3172 xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
3173 xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
3174 xmm_src4 = load_128_unaligned ((__m128i*)src + 3);
3175
3176 save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));
3177 save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));
3178 save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));
3179 save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));
3180
3181 dst += 16;
3182 src += 16;
3183 w -= 16;
3184 }
3185
3186 while (w)
3187 {
3188 *dst++ = *src++ | 0xff000000;
3189 w--;
3190 }
3191 }
3192
3193 _mm_empty ();
3194}
3195
3196/* ---------------------------------------------------------------------
3197 * composite_over_x888_n_8888
3198 */
3199static void
3200sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
3201 pixman_op_t op,
3202 pixman_image_t * src_image,
3203 pixman_image_t * mask_image,
3204 pixman_image_t * dst_image,
3205 int32_t src_x,
3206 int32_t src_y,
3207 int32_t mask_x,
3208 int32_t mask_y,
3209 int32_t dest_x,
3210 int32_t dest_y,
3211 int32_t width,
3212 int32_t height)
3213{
3214 uint32_t *dst_line, *dst;
3215 uint32_t *src_line, *src;
3216 uint32_t mask;
3217 int dst_stride, src_stride;
3218 int32_t w;
3219
3220 __m128i xmm_mask, xmm_alpha;
3221 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
3222 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
3223
3224 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
3225 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
3226 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3227 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
3228
3229 mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);
3230
3231 xmm_mask = create_mask_16_128 (mask >> 24);
3232 xmm_alpha = mask_00ff;
3233
3234 while (height--)
3235 {
3236 dst = dst_line;
3237 dst_line += dst_stride;
3238 src = src_line;
3239 src_line += src_stride;
3240 w = width;
3241
3242 while (w && (unsigned long)dst & 15)
3243 {
3244 uint32_t s = (*src++) | 0xff000000;
3245 uint32_t d = *dst;
3246
3247 __m64 src = unpack_32_1x64 (s);
3248 __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
3249 __m64 mask = _mm_movepi64_pi64 (xmm_mask);
3250 __m64 dest = unpack_32_1x64 (d);
3251
3252 *dst++ = pack_1x64_32 (
3253 in_over_1x64 (&src, &alpha, &mask, &dest));
3254
3255 w--;
3256 }
3257
3258 while (w >= 4)
3259 {
3260 xmm_src = _mm_or_si128 (
3261 load_128_unaligned ((__m128i*)src), mask_ff000000);
3262 xmm_dst = load_128_aligned ((__m128i*)dst);
3263
3264 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3265 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
3266
3267 in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
3268 &xmm_alpha, &xmm_alpha,
3269 &xmm_mask, &xmm_mask,
3270 &xmm_dst_lo, &xmm_dst_hi);
3271
3272 save_128_aligned (
3273 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3274
3275 dst += 4;
3276 src += 4;
3277 w -= 4;
3278
3279 }
3280
3281 while (w)
3282 {
3283 uint32_t s = (*src++) | 0xff000000;
3284 uint32_t d = *dst;
3285
3286 __m64 src = unpack_32_1x64 (s);
3287 __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
3288 __m64 mask = _mm_movepi64_pi64 (xmm_mask);
3289 __m64 dest = unpack_32_1x64 (d);
3290
3291 *dst++ = pack_1x64_32 (
3292 in_over_1x64 (&src, &alpha, &mask, &dest));
3293
3294 w--;
3295 }
3296 }
3297
3298 _mm_empty ();
3299}
3300
3301/* --------------------------------------------------------------------
3302 * composite_over_8888_8888
3303 */
3304static void
3305sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
3306 pixman_op_t op,
3307 pixman_image_t * src_image,
3308 pixman_image_t * mask_image,
3309 pixman_image_t * dst_image,
3310 int32_t src_x,
3311 int32_t src_y,
3312 int32_t mask_x,
3313 int32_t mask_y,
3314 int32_t dest_x,
3315 int32_t dest_y,
3316 int32_t width,
3317 int32_t height)
3318{
3319 int dst_stride, src_stride;
3320 uint32_t *dst_line, *dst;
3321 uint32_t *src_line, *src;
3322
3323 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
3324 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
3325 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3326 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
3327
3328 dst = dst_line;
3329 src = src_line;
3330
3331 while (height--)
3332 {
3333 core_combine_over_u_sse2 (dst, src, NULL((void*)0), width);
3334
3335 dst += dst_stride;
3336 src += src_stride;
3337 }
3338 _mm_empty ();
3339}
3340
3341/* ------------------------------------------------------------------
3342 * composite_over_8888_0565
3343 */
3344static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t
3345composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
3346{
3347 __m64 ms;
3348
3349 ms = unpack_32_1x64 (src);
3350 return pack_565_32_16 (
3351 pack_1x64_32 (
3352 over_1x64 (
3353 ms, expand_alpha_1x64 (ms), expand565_16_1x64 (dst))));
3354}
3355
3356static void
3357sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
3358 pixman_op_t op,
3359 pixman_image_t * src_image,
3360 pixman_image_t * mask_image,
3361 pixman_image_t * dst_image,
3362 int32_t src_x,
3363 int32_t src_y,
3364 int32_t mask_x,
3365 int32_t mask_y,
3366 int32_t dest_x,
3367 int32_t dest_y,
3368 int32_t width,
3369 int32_t height)
3370{
3371 uint16_t *dst_line, *dst, d;
3372 uint32_t *src_line, *src, s;
3373 int dst_stride, src_stride;
3374 int32_t w;
3375
3376 __m128i xmm_alpha_lo, xmm_alpha_hi;
3377 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
3378 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3379
3380 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
3381 dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
3382 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
3383 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
3384
3385#if 0
3386 /* FIXME
3387 *
3388 * I copy the code from MMX one and keep the fixme.
3389 * If it's a problem there, probably is a problem here.
3390 */
3391 assert (src_image->drawable == mask_image->drawable)((src_image->drawable == mask_image->drawable) ? (void)
(0) : __assert_fail ("src_image->drawable == mask_image->drawable"
, "pixman-sse2.c", 3391, __PRETTY_FUNCTION__))
;
3392#endif
3393
3394 while (height--)
3395 {
3396 dst = dst_line;
3397 src = src_line;
3398
3399 dst_line += dst_stride;
3400 src_line += src_stride;
3401 w = width;
3402
3403 /* Align dst on a 16-byte boundary */
3404 while (w &&
3405 ((unsigned long)dst & 15))
3406 {
3407 s = *src++;
3408 d = *dst;
3409
3410 *dst++ = composite_over_8888_0565pixel (s, d);
3411 w--;
3412 }
3413
3414 /* It's a 8 pixel loop */
3415 while (w >= 8)
3416 {
3417 /* I'm loading unaligned because I'm not sure
3418 * about the address alignment.
3419 */
3420 xmm_src = load_128_unaligned ((__m128i*) src);
3421 xmm_dst = load_128_aligned ((__m128i*) dst);
3422
3423 /* Unpacking */
3424 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3425 unpack_565_128_4x128 (xmm_dst,
3426 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3427 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
3428 &xmm_alpha_lo, &xmm_alpha_hi);
3429
3430 /* I'm loading next 4 pixels from memory
3431 * before to optimze the memory read.
3432 */
3433 xmm_src = load_128_unaligned ((__m128i*) (src + 4));
3434
3435 over_2x128 (&xmm_src_lo, &xmm_src_hi,
3436 &xmm_alpha_lo, &xmm_alpha_hi,
3437 &xmm_dst0, &xmm_dst1);
3438
3439 /* Unpacking */
3440 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
3441 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
3442 &xmm_alpha_lo, &xmm_alpha_hi);
3443
3444 over_2x128 (&xmm_src_lo, &xmm_src_hi,
3445 &xmm_alpha_lo, &xmm_alpha_hi,
3446 &xmm_dst2, &xmm_dst3);
3447
3448 save_128_aligned (
3449 (__m128i*)dst, pack_565_4x128_128 (
3450 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
3451
3452 w -= 8;
3453 dst += 8;
3454 src += 8;
3455 }
3456
3457 while (w--)
3458 {
3459 s = *src++;
3460 d = *dst;
3461
3462 *dst++ = composite_over_8888_0565pixel (s, d);
3463 }
3464 }
3465
3466 _mm_empty ();
3467}
3468
3469/* -----------------------------------------------------------------
3470 * composite_over_n_8_8888
3471 */
3472
3473static void
3474sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
3475 pixman_op_t op,
3476 pixman_image_t * src_image,
3477 pixman_image_t * mask_image,
3478 pixman_image_t * dst_image,
3479 int32_t src_x,
3480 int32_t src_y,
3481 int32_t mask_x,
3482 int32_t mask_y,
3483 int32_t dest_x,
3484 int32_t dest_y,
3485 int32_t width,
3486 int32_t height)
3487{
3488 uint32_t src, srca;
3489 uint32_t *dst_line, *dst;
3490 uint8_t *mask_line, *mask;
3491 int dst_stride, mask_stride;
3492 int32_t w;
3493 uint32_t m, d;
3494
3495 __m128i xmm_src, xmm_alpha, xmm_def;
3496 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
3497 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3498
3499 __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
3500
3501 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
3502
3503 srca = src >> 24;
3504 if (src == 0)
3505 return;
3506
3507 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
3508 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
3509 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3510 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
3511
3512 xmm_def = create_mask_2x32_128 (src, src);
3513 xmm_src = expand_pixel_32_1x128 (src);
3514 xmm_alpha = expand_alpha_1x128 (xmm_src);
3515 mmx_src = _mm_movepi64_pi64 (xmm_src);
3516 mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
3517
3518 while (height--)
3519 {
3520 dst = dst_line;
3521 dst_line += dst_stride;
3522 mask = mask_line;
3523 mask_line += mask_stride;
3524 w = width;
3525
3526 while (w && (unsigned long)dst & 15)
3527 {
3528 uint8_t m = *mask++;
3529
3530 if (m)
3531 {
3532 d = *dst;
3533 mmx_mask = expand_pixel_8_1x64 (m);
3534 mmx_dest = unpack_32_1x64 (d);
3535
3536 *dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
3537 &mmx_alpha,
3538 &mmx_mask,
3539 &mmx_dest));
3540 }
3541
3542 w--;
3543 dst++;
3544 }
3545
3546 while (w >= 4)
3547 {
3548 m = *((uint32_t*)mask);
3549
3550 if (srca == 0xff && m == 0xffffffff)
3551 {
3552 save_128_aligned ((__m128i*)dst, xmm_def);
3553 }
3554 else if (m)
3555 {
3556 xmm_dst = load_128_aligned ((__m128i*) dst);
3557 xmm_mask = unpack_32_1x128 (m);
3558 xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3559
3560 /* Unpacking */
3561 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
3562 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3563
3564 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3565 &xmm_mask_lo, &xmm_mask_hi);
3566
3567 in_over_2x128 (&xmm_src, &xmm_src,
3568 &xmm_alpha, &xmm_alpha,
3569 &xmm_mask_lo, &xmm_mask_hi,
3570 &xmm_dst_lo, &xmm_dst_hi);
3571
3572 save_128_aligned (
3573 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
3574 }
3575
3576 w -= 4;
3577 dst += 4;
3578 mask += 4;
3579 }
3580
3581 while (w)
3582 {
3583 uint8_t m = *mask++;
3584
3585 if (m)
3586 {
3587 d = *dst;
3588 mmx_mask = expand_pixel_8_1x64 (m);
3589 mmx_dest = unpack_32_1x64 (d);
3590
3591 *dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
3592 &mmx_alpha,
3593 &mmx_mask,
3594 &mmx_dest));
3595 }
3596
3597 w--;
3598 dst++;
3599 }
3600 }
3601
3602 _mm_empty ();
3603}
3604
3605/* ----------------------------------------------------------------
3606 * composite_over_n_8_8888
3607 */
3608
3609pixman_bool_t
3610pixman_fill_sse2 (uint32_t *bits,
3611 int stride,
3612 int bpp,
3613 int x,
3614 int y,
3615 int width,
3616 int height,
3617 uint32_t data)
3618{
3619 uint32_t byte_width;
3620 uint8_t *byte_line;
3621
3622 __m128i xmm_def;
3623
3624 if (bpp == 8)
3625 {
3626 uint8_t b;
3627 uint16_t w;
3628
3629 stride = stride * (int) sizeof (uint32_t) / 1;
3630 byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);
3631 byte_width = width;
3632 stride *= 1;
3633
3634 b = data & 0xff;
3635 w = (b << 8) | b;
3636 data = (w << 16) | w;
3637 }
3638 else if (bpp == 16)
3639 {
3640 stride = stride * (int) sizeof (uint32_t) / 2;
3641 byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
3642 byte_width = 2 * width;
3643 stride *= 2;
3644
3645 data = (data & 0xffff) * 0x00010001;
3646 }
3647 else if (bpp == 32)
3648 {
3649 stride = stride * (int) sizeof (uint32_t) / 4;
3650 byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
3651 byte_width = 4 * width;
3652 stride *= 4;
3653 }
3654 else
3655 {
3656 return FALSE0;
3657 }
3658
3659 xmm_def = create_mask_2x32_128 (data, data);
3660
3661 while (height--)
3662 {
3663 int w;
3664 uint8_t *d = byte_line;
3665 byte_line += stride;
3666 w = byte_width;
3667
3668 while (w >= 1 && ((unsigned long)d & 1))
3669 {
3670 *(uint8_t *)d = data;
3671 w -= 1;
3672 d += 1;
3673 }
3674
3675 while (w >= 2 && ((unsigned long)d & 3))
3676 {
3677 *(uint16_t *)d = data;
3678 w -= 2;
3679 d += 2;
3680 }
3681
3682 while (w >= 4 && ((unsigned long)d & 15))
3683 {
3684 *(uint32_t *)d = data;
3685
3686 w -= 4;
3687 d += 4;
3688 }
3689
3690 while (w >= 128)
3691 {
3692 save_128_aligned ((__m128i*)(d), xmm_def);
3693 save_128_aligned ((__m128i*)(d + 16), xmm_def);
3694 save_128_aligned ((__m128i*)(d + 32), xmm_def);
3695 save_128_aligned ((__m128i*)(d + 48), xmm_def);
3696 save_128_aligned ((__m128i*)(d + 64), xmm_def);
3697 save_128_aligned ((__m128i*)(d + 80), xmm_def);
3698 save_128_aligned ((__m128i*)(d + 96), xmm_def);
3699 save_128_aligned ((__m128i*)(d + 112), xmm_def);
3700
3701 d += 128;
3702 w -= 128;
3703 }
3704
3705 if (w >= 64)
3706 {
3707 save_128_aligned ((__m128i*)(d), xmm_def);
3708 save_128_aligned ((__m128i*)(d + 16), xmm_def);
3709 save_128_aligned ((__m128i*)(d + 32), xmm_def);
3710 save_128_aligned ((__m128i*)(d + 48), xmm_def);
3711
3712 d += 64;
3713 w -= 64;
3714 }
3715
3716 if (w >= 32)
3717 {
3718 save_128_aligned ((__m128i*)(d), xmm_def);
3719 save_128_aligned ((__m128i*)(d + 16), xmm_def);
3720
3721 d += 32;
3722 w -= 32;
3723 }
3724
3725 if (w >= 16)
3726 {
3727 save_128_aligned ((__m128i*)(d), xmm_def);
3728
3729 d += 16;
3730 w -= 16;
3731 }
3732
3733 while (w >= 4)
3734 {
3735 *(uint32_t *)d = data;
3736
3737 w -= 4;
3738 d += 4;
3739 }
3740
3741 if (w >= 2)
3742 {
3743 *(uint16_t *)d = data;
3744 w -= 2;
3745 d += 2;
3746 }
3747
3748 if (w >= 1)
3749 {
3750 *(uint8_t *)d = data;
3751 w -= 1;
Value stored to 'w' is never read
3752 d += 1;
3753 }
3754 }
3755
3756 _mm_empty ();
3757 return TRUE1;
3758}
3759
3760static void
3761sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
3762 pixman_op_t op,
3763 pixman_image_t * src_image,
3764 pixman_image_t * mask_image,
3765 pixman_image_t * dst_image,
3766 int32_t src_x,
3767 int32_t src_y,
3768 int32_t mask_x,
3769 int32_t mask_y,
3770 int32_t dest_x,
3771 int32_t dest_y,
3772 int32_t width,
3773 int32_t height)
3774{
3775 uint32_t src, srca;
3776 uint32_t *dst_line, *dst;
3777 uint8_t *mask_line, *mask;
3778 int dst_stride, mask_stride;
3779 int32_t w;
3780 uint32_t m;
3781
3782 __m128i xmm_src, xmm_def;
3783 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3784
3785 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
3786
3787 srca = src >> 24;
3788 if (src == 0)
3789 {
3790 pixman_fill_sse2 (dst_image->bits.bits, dst_image->bits.rowstride,
3791 PIXMAN_FORMAT_BPP (dst_image->bits.format)(((dst_image->bits.format) >> 24) ),
3792 dest_x, dest_y, width, height, 0);
3793 return;
3794 }
3795
3796 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
3797 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
3798 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3799 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
3800
3801 xmm_def = create_mask_2x32_128 (src, src);
3802 xmm_src = expand_pixel_32_1x128 (src);
3803
3804 while (height--)
3805 {
3806 dst = dst_line;
3807 dst_line += dst_stride;
3808 mask = mask_line;
3809 mask_line += mask_stride;
3810 w = width;
3811
3812 while (w && (unsigned long)dst & 15)
3813 {
3814 uint8_t m = *mask++;
3815
3816 if (m)
3817 {
3818 *dst = pack_1x64_32 (
3819 pix_multiply_1x64 (
3820 _mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
3821 }
3822 else
3823 {
3824 *dst = 0;
3825 }
3826
3827 w--;
3828 dst++;
3829 }
3830
3831 while (w >= 4)
3832 {
3833 m = *((uint32_t*)mask);
3834
3835 if (srca == 0xff && m == 0xffffffff)
3836 {
3837 save_128_aligned ((__m128i*)dst, xmm_def);
3838 }
3839 else if (m)
3840 {
3841 xmm_mask = unpack_32_1x128 (m);
3842 xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3843
3844 /* Unpacking */
3845 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3846
3847 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3848 &xmm_mask_lo, &xmm_mask_hi);
3849
3850 pix_multiply_2x128 (&xmm_src, &xmm_src,
3851 &xmm_mask_lo, &xmm_mask_hi,
3852 &xmm_mask_lo, &xmm_mask_hi);
3853
3854 save_128_aligned (
3855 (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
3856 }
3857 else
3858 {
3859 save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());
3860 }
3861
3862 w -= 4;
3863 dst += 4;
3864 mask += 4;
3865 }
3866
3867 while (w)
3868 {
3869 uint8_t m = *mask++;
3870
3871 if (m)
3872 {
3873 *dst = pack_1x64_32 (
3874 pix_multiply_1x64 (
3875 _mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
3876 }
3877 else
3878 {
3879 *dst = 0;
3880 }
3881
3882 w--;
3883 dst++;
3884 }
3885 }
3886
3887 _mm_empty ();
3888}
3889
3890/*-----------------------------------------------------------------------
3891 * composite_over_n_8_0565
3892 */
3893
3894static void
3895sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
3896 pixman_op_t op,
3897 pixman_image_t * src_image,
3898 pixman_image_t * mask_image,
3899 pixman_image_t * dst_image,
3900 int32_t src_x,
3901 int32_t src_y,
3902 int32_t mask_x,
3903 int32_t mask_y,
3904 int32_t dest_x,
3905 int32_t dest_y,
3906 int32_t width,
3907 int32_t height)
3908{
3909 uint32_t src, srca;
3910 uint16_t *dst_line, *dst, d;
3911 uint8_t *mask_line, *mask;
3912 int dst_stride, mask_stride;
3913 int32_t w;
3914 uint32_t m;
3915 __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
3916
3917 __m128i xmm_src, xmm_alpha;
3918 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
3919 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
3920
3921 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
3922
3923 srca = src >> 24;
3924 if (src == 0)
3925 return;
3926
3927 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
3928 dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
3929 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
3930 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
3931
3932 xmm_src = expand_pixel_32_1x128 (src);
3933 xmm_alpha = expand_alpha_1x128 (xmm_src);
3934 mmx_src = _mm_movepi64_pi64 (xmm_src);
3935 mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
3936
3937 while (height--)
3938 {
3939 dst = dst_line;
3940 dst_line += dst_stride;
3941 mask = mask_line;
3942 mask_line += mask_stride;
3943 w = width;
3944
3945 while (w && (unsigned long)dst & 15)
3946 {
3947 m = *mask++;
3948
3949 if (m)
3950 {
3951 d = *dst;
3952 mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
3953 mmx_dest = expand565_16_1x64 (d);
3954
3955 *dst = pack_565_32_16 (
3956 pack_1x64_32 (
3957 in_over_1x64 (
3958 &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
3959 }
3960
3961 w--;
3962 dst++;
3963 }
3964
3965 while (w >= 8)
3966 {
3967 xmm_dst = load_128_aligned ((__m128i*) dst);
3968 unpack_565_128_4x128 (xmm_dst,
3969 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
3970
3971 m = *((uint32_t*)mask);
3972 mask += 4;
3973
3974 if (m)
3975 {
3976 xmm_mask = unpack_32_1x128 (m);
3977 xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3978
3979 /* Unpacking */
3980 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
3981
3982 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
3983 &xmm_mask_lo, &xmm_mask_hi);
3984
3985 in_over_2x128 (&xmm_src, &xmm_src,
3986 &xmm_alpha, &xmm_alpha,
3987 &xmm_mask_lo, &xmm_mask_hi,
3988 &xmm_dst0, &xmm_dst1);
3989 }
3990
3991 m = *((uint32_t*)mask);
3992 mask += 4;
3993
3994 if (m)
3995 {
3996 xmm_mask = unpack_32_1x128 (m);
3997 xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
3998
3999 /* Unpacking */
4000 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4001
4002 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
4003 &xmm_mask_lo, &xmm_mask_hi);
4004 in_over_2x128 (&xmm_src, &xmm_src,
4005 &xmm_alpha, &xmm_alpha,
4006 &xmm_mask_lo, &xmm_mask_hi,
4007 &xmm_dst2, &xmm_dst3);
4008 }
4009
4010 save_128_aligned (
4011 (__m128i*)dst, pack_565_4x128_128 (
4012 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
4013
4014 w -= 8;
4015 dst += 8;
4016 }
4017
4018 while (w)
4019 {
4020 m = *mask++;
4021
4022 if (m)
4023 {
4024 d = *dst;
4025 mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
4026 mmx_dest = expand565_16_1x64 (d);
4027
4028 *dst = pack_565_32_16 (
4029 pack_1x64_32 (
4030 in_over_1x64 (
4031 &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
4032 }
4033
4034 w--;
4035 dst++;
4036 }
4037 }
4038
4039 _mm_empty ();
4040}
4041
4042/* -----------------------------------------------------------------------
4043 * composite_over_pixbuf_0565
4044 */
4045
4046static void
4047sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
4048 pixman_op_t op,
4049 pixman_image_t * src_image,
4050 pixman_image_t * mask_image,
4051 pixman_image_t * dst_image,
4052 int32_t src_x,
4053 int32_t src_y,
4054 int32_t mask_x,
4055 int32_t mask_y,
4056 int32_t dest_x,
4057 int32_t dest_y,
4058 int32_t width,
4059 int32_t height)
4060{
4061 uint16_t *dst_line, *dst, d;
4062 uint32_t *src_line, *src, s;
4063 int dst_stride, src_stride;
4064 int32_t w;
4065 uint32_t opaque, zero;
4066
4067 __m64 ms;
4068 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
4069 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
4070
4071 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
4072 dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
4073 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4074 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
4075
4076#if 0
4077 /* FIXME
4078 *
4079 * I copy the code from MMX one and keep the fixme.
4080 * If it's a problem there, probably is a problem here.
4081 */
4082 assert (src_image->drawable == mask_image->drawable)((src_image->drawable == mask_image->drawable) ? (void)
(0) : __assert_fail ("src_image->drawable == mask_image->drawable"
, "pixman-sse2.c", 4082, __PRETTY_FUNCTION__))
;
4083#endif
4084
4085 while (height--)
4086 {
4087 dst = dst_line;
4088 dst_line += dst_stride;
4089 src = src_line;
4090 src_line += src_stride;
4091 w = width;
4092
4093 while (w && (unsigned long)dst & 15)
4094 {
4095 s = *src++;
4096 d = *dst;
4097
4098 ms = unpack_32_1x64 (s);
4099
4100 *dst++ = pack_565_32_16 (
4101 pack_1x64_32 (
4102 over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));
4103 w--;
4104 }
4105
4106 while (w >= 8)
4107 {
4108 /* First round */
4109 xmm_src = load_128_unaligned ((__m128i*)src);
4110 xmm_dst = load_128_aligned ((__m128i*)dst);
4111
4112 opaque = is_opaque (xmm_src);
4113 zero = is_zero (xmm_src);
4114
4115 unpack_565_128_4x128 (xmm_dst,
4116 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
4117 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
4118
4119 /* preload next round*/
4120 xmm_src = load_128_unaligned ((__m128i*)(src + 4));
4121
4122 if (opaque)
4123 {
4124 invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
4125 &xmm_dst0, &xmm_dst1);
4126 }
4127 else if (!zero)
4128 {
4129 over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
4130 &xmm_dst0, &xmm_dst1);
4131 }
4132
4133 /* Second round */
4134 opaque = is_opaque (xmm_src);
4135 zero = is_zero (xmm_src);
4136
4137 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
4138
4139 if (opaque)
4140 {
4141 invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
4142 &xmm_dst2, &xmm_dst3);
4143 }
4144 else if (!zero)
4145 {
4146 over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
4147 &xmm_dst2, &xmm_dst3);
4148 }
4149
4150 save_128_aligned (
4151 (__m128i*)dst, pack_565_4x128_128 (
4152 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
4153
4154 w -= 8;
4155 src += 8;
4156 dst += 8;
4157 }
4158
4159 while (w)
4160 {
4161 s = *src++;
4162 d = *dst;
4163
4164 ms = unpack_32_1x64 (s);
4165
4166 *dst++ = pack_565_32_16 (
4167 pack_1x64_32 (
4168 over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));
4169 w--;
4170 }
4171 }
4172
4173 _mm_empty ();
4174}
4175
4176/* -------------------------------------------------------------------------
4177 * composite_over_pixbuf_8888
4178 */
4179
4180static void
4181sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
4182 pixman_op_t op,
4183 pixman_image_t * src_image,
4184 pixman_image_t * mask_image,
4185 pixman_image_t * dst_image,
4186 int32_t src_x,
4187 int32_t src_y,
4188 int32_t mask_x,
4189 int32_t mask_y,
4190 int32_t dest_x,
4191 int32_t dest_y,
4192 int32_t width,
4193 int32_t height)
4194{
4195 uint32_t *dst_line, *dst, d;
4196 uint32_t *src_line, *src, s;
4197 int dst_stride, src_stride;
4198 int32_t w;
4199 uint32_t opaque, zero;
4200
4201 __m128i xmm_src_lo, xmm_src_hi;
4202 __m128i xmm_dst_lo, xmm_dst_hi;
4203
4204 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
4205 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
4206 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4207 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
4208
4209#if 0
4210 /* FIXME
4211 *
4212 * I copy the code from MMX one and keep the fixme.
4213 * If it's a problem there, probably is a problem here.
4214 */
4215 assert (src_image->drawable == mask_image->drawable)((src_image->drawable == mask_image->drawable) ? (void)
(0) : __assert_fail ("src_image->drawable == mask_image->drawable"
, "pixman-sse2.c", 4215, __PRETTY_FUNCTION__))
;
4216#endif
4217
4218 while (height--)
4219 {
4220 dst = dst_line;
4221 dst_line += dst_stride;
4222 src = src_line;
4223 src_line += src_stride;
4224 w = width;
4225
4226 while (w && (unsigned long)dst & 15)
4227 {
4228 s = *src++;
4229 d = *dst;
4230
4231 *dst++ = pack_1x64_32 (
4232 over_rev_non_pre_1x64 (
4233 unpack_32_1x64 (s), unpack_32_1x64 (d)));
4234
4235 w--;
4236 }
4237
4238 while (w >= 4)
4239 {
4240 xmm_src_hi = load_128_unaligned ((__m128i*)src);
4241
4242 opaque = is_opaque (xmm_src_hi);
4243 zero = is_zero (xmm_src_hi);
4244
4245 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
4246
4247 if (opaque)
4248 {
4249 invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
4250 &xmm_dst_lo, &xmm_dst_hi);
4251
4252 save_128_aligned (
4253 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4254 }
4255 else if (!zero)
4256 {
4257 xmm_dst_hi = load_128_aligned ((__m128i*)dst);
4258
4259 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
4260
4261 over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
4262 &xmm_dst_lo, &xmm_dst_hi);
4263
4264 save_128_aligned (
4265 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4266 }
4267
4268 w -= 4;
4269 dst += 4;
4270 src += 4;
4271 }
4272
4273 while (w)
4274 {
4275 s = *src++;
4276 d = *dst;
4277
4278 *dst++ = pack_1x64_32 (
4279 over_rev_non_pre_1x64 (
4280 unpack_32_1x64 (s), unpack_32_1x64 (d)));
4281
4282 w--;
4283 }
4284 }
4285
4286 _mm_empty ();
4287}
4288
4289/* -------------------------------------------------------------------------------------------------
4290 * composite_over_n_8888_0565_ca
4291 */
4292
4293static void
4294sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
4295 pixman_op_t op,
4296 pixman_image_t * src_image,
4297 pixman_image_t * mask_image,
4298 pixman_image_t * dst_image,
4299 int32_t src_x,
4300 int32_t src_y,
4301 int32_t mask_x,
4302 int32_t mask_y,
4303 int32_t dest_x,
4304 int32_t dest_y,
4305 int32_t width,
4306 int32_t height)
4307{
4308 uint32_t src;
4309 uint16_t *dst_line, *dst, d;
4310 uint32_t *mask_line, *mask, m;
4311 int dst_stride, mask_stride;
4312 int w;
4313 uint32_t pack_cmp;
4314
4315 __m128i xmm_src, xmm_alpha;
4316 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4317 __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
4318
4319 __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
4320
4321 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
4322
4323 if (src == 0)
4324 return;
4325
4326 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
4327 dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
4328 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4329 mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
4330
4331 xmm_src = expand_pixel_32_1x128 (src);
4332 xmm_alpha = expand_alpha_1x128 (xmm_src);
4333 mmx_src = _mm_movepi64_pi64 (xmm_src);
4334 mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
4335
4336 while (height--)
4337 {
4338 w = width;
4339 mask = mask_line;
4340 dst = dst_line;
4341 mask_line += mask_stride;
4342 dst_line += dst_stride;
4343
4344 while (w && ((unsigned long)dst & 15))
4345 {
4346 m = *(uint32_t *) mask;
4347
4348 if (m)
4349 {
4350 d = *dst;
4351 mmx_mask = unpack_32_1x64 (m);
4352 mmx_dest = expand565_16_1x64 (d);
4353
4354 *dst = pack_565_32_16 (
4355 pack_1x64_32 (
4356 in_over_1x64 (
4357 &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
4358 }
4359
4360 w--;
4361 dst++;
4362 mask++;
4363 }
4364
4365 while (w >= 8)
4366 {
4367 /* First round */
4368 xmm_mask = load_128_unaligned ((__m128i*)mask);
4369 xmm_dst = load_128_aligned ((__m128i*)dst);
4370
4371 pack_cmp = _mm_movemask_epi8 (
4372 _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
4373
4374 unpack_565_128_4x128 (xmm_dst,
4375 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
4376 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4377
4378 /* preload next round */
4379 xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));
4380
4381 /* preload next round */
4382 if (pack_cmp != 0xffff)
4383 {
4384 in_over_2x128 (&xmm_src, &xmm_src,
4385 &xmm_alpha, &xmm_alpha,
4386 &xmm_mask_lo, &xmm_mask_hi,
4387 &xmm_dst0, &xmm_dst1);
4388 }
4389
4390 /* Second round */
4391 pack_cmp = _mm_movemask_epi8 (
4392 _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
4393
4394 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4395
4396 if (pack_cmp != 0xffff)
4397 {
4398 in_over_2x128 (&xmm_src, &xmm_src,
4399 &xmm_alpha, &xmm_alpha,
4400 &xmm_mask_lo, &xmm_mask_hi,
4401 &xmm_dst2, &xmm_dst3);
4402 }
4403
4404 save_128_aligned (
4405 (__m128i*)dst, pack_565_4x128_128 (
4406 &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
4407
4408 w -= 8;
4409 dst += 8;
4410 mask += 8;
4411 }
4412
4413 while (w)
4414 {
4415 m = *(uint32_t *) mask;
4416
4417 if (m)
4418 {
4419 d = *dst;
4420 mmx_mask = unpack_32_1x64 (m);
4421 mmx_dest = expand565_16_1x64 (d);
4422
4423 *dst = pack_565_32_16 (
4424 pack_1x64_32 (
4425 in_over_1x64 (
4426 &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
4427 }
4428
4429 w--;
4430 dst++;
4431 mask++;
4432 }
4433 }
4434
4435 _mm_empty ();
4436}
4437
4438/* -----------------------------------------------------------------------
4439 * composite_in_n_8_8
4440 */
4441
4442static void
4443sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
4444 pixman_op_t op,
4445 pixman_image_t * src_image,
4446 pixman_image_t * mask_image,
4447 pixman_image_t * dst_image,
4448 int32_t src_x,
4449 int32_t src_y,
4450 int32_t mask_x,
4451 int32_t mask_y,
4452 int32_t dest_x,
4453 int32_t dest_y,
4454 int32_t width,
4455 int32_t height)
4456{
4457 uint8_t *dst_line, *dst;
4458 uint8_t *mask_line, *mask;
4459 int dst_stride, mask_stride;
4460 uint32_t d, m;
4461 uint32_t src;
4462 uint8_t sa;
4463 int32_t w;
4464
4465 __m128i xmm_alpha;
4466 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4467 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4468
4469 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
4470 dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
4471 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4472 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
4473
4474 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
4475
4476 sa = src >> 24;
4477
4478 xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
4479
4480 while (height--)
4481 {
4482 dst = dst_line;
4483 dst_line += dst_stride;
4484 mask = mask_line;
4485 mask_line += mask_stride;
4486 w = width;
4487
4488 while (w && ((unsigned long)dst & 15))
4489 {
4490 m = (uint32_t) *mask++;
4491 d = (uint32_t) *dst;
4492
4493 *dst++ = (uint8_t) pack_1x64_32 (
4494 pix_multiply_1x64 (
4495 pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha),
4496 unpack_32_1x64 (m)),
4497 unpack_32_1x64 (d)));
4498 w--;
4499 }
4500
4501 while (w >= 16)
4502 {
4503 xmm_mask = load_128_unaligned ((__m128i*)mask);
4504 xmm_dst = load_128_aligned ((__m128i*)dst);
4505
4506 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4507 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4508
4509 pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
4510 &xmm_mask_lo, &xmm_mask_hi,
4511 &xmm_mask_lo, &xmm_mask_hi);
4512
4513 pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
4514 &xmm_dst_lo, &xmm_dst_hi,
4515 &xmm_dst_lo, &xmm_dst_hi);
4516
4517 save_128_aligned (
4518 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4519
4520 mask += 16;
4521 dst += 16;
4522 w -= 16;
4523 }
4524
4525 while (w)
4526 {
4527 m = (uint32_t) *mask++;
4528 d = (uint32_t) *dst;
4529
4530 *dst++ = (uint8_t) pack_1x64_32 (
4531 pix_multiply_1x64 (
4532 pix_multiply_1x64 (
4533 _mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
4534 unpack_32_1x64 (d)));
4535 w--;
4536 }
4537 }
4538
4539 _mm_empty ();
4540}
4541
4542/* -----------------------------------------------------------------------
4543 * composite_in_n_8
4544 */
4545
4546static void
4547sse2_composite_in_n_8 (pixman_implementation_t *imp,
4548 pixman_op_t op,
4549 pixman_image_t * src_image,
4550 pixman_image_t * mask_image,
4551 pixman_image_t * dst_image,
4552 int32_t src_x,
4553 int32_t src_y,
4554 int32_t mask_x,
4555 int32_t mask_y,
4556 int32_t dest_x,
4557 int32_t dest_y,
4558 int32_t width,
4559 int32_t height)
4560{
4561 uint8_t *dst_line, *dst;
4562 int dst_stride;
4563 uint32_t d;
4564 uint32_t src;
4565 int32_t w;
4566
4567 __m128i xmm_alpha;
4568 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4569
4570 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
4571 dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
4572
4573 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
4574
4575 xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
4576
4577 src = src >> 24;
4578
4579 if (src == 0xff)
4580 return;
4581
4582 if (src == 0x00)
4583 {
4584 pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
4585 8, dest_x, dest_y, width, height, src);
4586
4587 return;
4588 }
4589
4590 while (height--)
4591 {
4592 dst = dst_line;
4593 dst_line += dst_stride;
4594 w = width;
4595
4596 while (w && ((unsigned long)dst & 15))
4597 {
4598 d = (uint32_t) *dst;
4599
4600 *dst++ = (uint8_t) pack_1x64_32 (
4601 pix_multiply_1x64 (
4602 _mm_movepi64_pi64 (xmm_alpha),
4603 unpack_32_1x64 (d)));
4604 w--;
4605 }
4606
4607 while (w >= 16)
4608 {
4609 xmm_dst = load_128_aligned ((__m128i*)dst);
4610
4611 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4612
4613 pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
4614 &xmm_dst_lo, &xmm_dst_hi,
4615 &xmm_dst_lo, &xmm_dst_hi);
4616
4617 save_128_aligned (
4618 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4619
4620 dst += 16;
4621 w -= 16;
4622 }
4623
4624 while (w)
4625 {
4626 d = (uint32_t) *dst;
4627
4628 *dst++ = (uint8_t) pack_1x64_32 (
4629 pix_multiply_1x64 (
4630 _mm_movepi64_pi64 (xmm_alpha),
4631 unpack_32_1x64 (d)));
4632 w--;
4633 }
4634 }
4635
4636 _mm_empty ();
4637}
4638
4639/* ---------------------------------------------------------------------------
4640 * composite_in_8_8
4641 */
4642
4643static void
4644sse2_composite_in_8_8 (pixman_implementation_t *imp,
4645 pixman_op_t op,
4646 pixman_image_t * src_image,
4647 pixman_image_t * mask_image,
4648 pixman_image_t * dst_image,
4649 int32_t src_x,
4650 int32_t src_y,
4651 int32_t mask_x,
4652 int32_t mask_y,
4653 int32_t dest_x,
4654 int32_t dest_y,
4655 int32_t width,
4656 int32_t height)
4657{
4658 uint8_t *dst_line, *dst;
4659 uint8_t *src_line, *src;
4660 int src_stride, dst_stride;
4661 int32_t w;
4662 uint32_t s, d;
4663
4664 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
4665 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4666
4667 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
4668 dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
4669 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4670 src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
4671
4672 while (height--)
4673 {
4674 dst = dst_line;
4675 dst_line += dst_stride;
4676 src = src_line;
4677 src_line += src_stride;
4678 w = width;
4679
4680 while (w && ((unsigned long)dst & 15))
4681 {
4682 s = (uint32_t) *src++;
4683 d = (uint32_t) *dst;
4684
4685 *dst++ = (uint8_t) pack_1x64_32 (
4686 pix_multiply_1x64 (
4687 unpack_32_1x64 (s), unpack_32_1x64 (d)));
4688 w--;
4689 }
4690
4691 while (w >= 16)
4692 {
4693 xmm_src = load_128_unaligned ((__m128i*)src);
4694 xmm_dst = load_128_aligned ((__m128i*)dst);
4695
4696 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
4697 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4698
4699 pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
4700 &xmm_dst_lo, &xmm_dst_hi,
4701 &xmm_dst_lo, &xmm_dst_hi);
4702
4703 save_128_aligned (
4704 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4705
4706 src += 16;
4707 dst += 16;
4708 w -= 16;
4709 }
4710
4711 while (w)
4712 {
4713 s = (uint32_t) *src++;
4714 d = (uint32_t) *dst;
4715
4716 *dst++ = (uint8_t) pack_1x64_32 (
4717 pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d)));
4718 w--;
4719 }
4720 }
4721
4722 _mm_empty ();
4723}
4724
4725/* -------------------------------------------------------------------------
4726 * composite_add_n_8_8
4727 */
4728
4729static void
4730sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
4731 pixman_op_t op,
4732 pixman_image_t * src_image,
4733 pixman_image_t * mask_image,
4734 pixman_image_t * dst_image,
4735 int32_t src_x,
4736 int32_t src_y,
4737 int32_t mask_x,
4738 int32_t mask_y,
4739 int32_t dest_x,
4740 int32_t dest_y,
4741 int32_t width,
4742 int32_t height)
4743{
4744 uint8_t *dst_line, *dst;
4745 uint8_t *mask_line, *mask;
4746 int dst_stride, mask_stride;
4747 int32_t w;
4748 uint32_t src;
4749 uint8_t sa;
4750 uint32_t m, d;
4751
4752 __m128i xmm_alpha;
4753 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
4754 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
4755
4756 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
4757 dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
4758 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
4759 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
4760
4761 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
4762
4763 sa = src >> 24;
4764
4765 xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
4766
4767 while (height--)
4768 {
4769 dst = dst_line;
4770 dst_line += dst_stride;
4771 mask = mask_line;
4772 mask_line += mask_stride;
4773 w = width;
4774
4775 while (w && ((unsigned long)dst & 15))
4776 {
4777 m = (uint32_t) *mask++;
4778 d = (uint32_t) *dst;
4779
4780 *dst++ = (uint8_t) pack_1x64_32 (
4781 _mm_adds_pu16 (
4782 pix_multiply_1x64 (
4783 _mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
4784 unpack_32_1x64 (d)));
4785 w--;
4786 }
4787
4788 while (w >= 16)
4789 {
4790 xmm_mask = load_128_unaligned ((__m128i*)mask);
4791 xmm_dst = load_128_aligned ((__m128i*)dst);
4792
4793 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
4794 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
4795
4796 pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
4797 &xmm_mask_lo, &xmm_mask_hi,
4798 &xmm_mask_lo, &xmm_mask_hi);
4799
4800 xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
4801 xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
4802
4803 save_128_aligned (
4804 (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
4805
4806 mask += 16;
4807 dst += 16;
4808 w -= 16;
4809 }
4810
4811 while (w)
4812 {
4813 m = (uint32_t) *mask++;
4814 d = (uint32_t) *dst;
4815
4816 *dst++ = (uint8_t) pack_1x64_32 (
4817 _mm_adds_pu16 (
4818 pix_multiply_1x64 (
4819 _mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
4820 unpack_32_1x64 (d)));
4821
4822 w--;
4823 }
4824 }
4825
4826 _mm_empty ();
4827}
4828
4829/* -------------------------------------------------------------------------
4830 * composite_add_n_8_8
4831 */
4832
4833static void
4834sse2_composite_add_n_8 (pixman_implementation_t *imp,
4835 pixman_op_t op,
4836 pixman_image_t * src_image,
4837 pixman_image_t * mask_image,
4838 pixman_image_t * dst_image,
4839 int32_t src_x,
4840 int32_t src_y,
4841 int32_t mask_x,
4842 int32_t mask_y,
4843 int32_t dest_x,
4844 int32_t dest_y,
4845 int32_t width,
4846 int32_t height)
4847{
4848 uint8_t *dst_line, *dst;
4849 int dst_stride;
4850 int32_t w;
4851 uint32_t src;
4852
4853 __m128i xmm_src;
4854
4855 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
4856 dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
4857
4858 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
4859
4860 src >>= 24;
4861
4862 if (src == 0x00)
4863 return;
4864
4865 if (src == 0xff)
4866 {
4867 pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
4868 8, dest_x, dest_y, width, height, 0xff);
4869
4870 return;
4871 }
4872
4873 src = (src << 24) | (src << 16) | (src << 8) | src;
4874 xmm_src = _mm_set_epi32 (src, src, src, src);
4875
4876 while (height--)
4877 {
4878 dst = dst_line;
4879 dst_line += dst_stride;
4880 w = width;
4881
4882 while (w && ((unsigned long)dst & 15))
4883 {
4884 *dst = (uint8_t)_mm_cvtsi64_si32 (
4885 _mm_adds_pu8 (
4886 _mm_movepi64_pi64 (xmm_src),
4887 _mm_cvtsi32_si64 (*dst)));
4888
4889 w--;
4890 dst++;
4891 }
4892
4893 while (w >= 16)
4894 {
4895 save_128_aligned (
4896 (__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));
4897
4898 dst += 16;
4899 w -= 16;
4900 }
4901
4902 while (w)
4903 {
4904 *dst = (uint8_t)_mm_cvtsi64_si32 (
4905 _mm_adds_pu8 (
4906 _mm_movepi64_pi64 (xmm_src),
4907 _mm_cvtsi32_si64 (*dst)));
4908
4909 w--;
4910 dst++;
4911 }
4912 }
4913
4914 _mm_empty ();
4915}
4916
4917/* ----------------------------------------------------------------------
4918 * composite_add_8_8
4919 */
4920
4921static void
4922sse2_composite_add_8_8 (pixman_implementation_t *imp,
4923 pixman_op_t op,
4924 pixman_image_t * src_image,
4925 pixman_image_t * mask_image,
4926 pixman_image_t * dst_image,
4927 int32_t src_x,
4928 int32_t src_y,
4929 int32_t mask_x,
4930 int32_t mask_y,
4931 int32_t dest_x,
4932 int32_t dest_y,
4933 int32_t width,
4934 int32_t height)
4935{
4936 uint8_t *dst_line, *dst;
4937 uint8_t *src_line, *src;
4938 int dst_stride, src_stride;
4939 int32_t w;
4940 uint16_t t;
4941
4942 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
4943 src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
4944 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
4945 dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
4946
4947 while (height--)
4948 {
4949 dst = dst_line;
4950 src = src_line;
4951
4952 dst_line += dst_stride;
4953 src_line += src_stride;
4954 w = width;
4955
4956 /* Small head */
4957 while (w && (unsigned long)dst & 3)
4958 {
4959 t = (*dst) + (*src++);
4960 *dst++ = t | (0 - (t >> 8));
4961 w--;
4962 }
4963
4964 core_combine_add_u_sse2 ((uint32_t*)dst, (uint32_t*)src, NULL((void*)0), w >> 2);
4965
4966 /* Small tail */
4967 dst += w & 0xfffc;
4968 src += w & 0xfffc;
4969
4970 w &= 3;
4971
4972 while (w)
4973 {
4974 t = (*dst) + (*src++);
4975 *dst++ = t | (0 - (t >> 8));
4976 w--;
4977 }
4978 }
4979
4980 _mm_empty ();
4981}
4982
4983/* ---------------------------------------------------------------------
4984 * composite_add_8888_8888
4985 */
4986static void
4987sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
4988 pixman_op_t op,
4989 pixman_image_t * src_image,
4990 pixman_image_t * mask_image,
4991 pixman_image_t * dst_image,
4992 int32_t src_x,
4993 int32_t src_y,
4994 int32_t mask_x,
4995 int32_t mask_y,
4996 int32_t dest_x,
4997 int32_t dest_y,
4998 int32_t width,
4999 int32_t height)
5000{
5001 uint32_t *dst_line, *dst;
5002 uint32_t *src_line, *src;
5003 int dst_stride, src_stride;
5004
5005 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
5006 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
5007 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
5008 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
5009
5010 while (height--)
5011 {
5012 dst = dst_line;
5013 dst_line += dst_stride;
5014 src = src_line;
5015 src_line += src_stride;
5016
5017 core_combine_add_u_sse2 (dst, src, NULL((void*)0), width);
5018 }
5019
5020 _mm_empty ();
5021}
5022
5023/* -------------------------------------------------------------------------------------------------
5024 * sse2_composite_copy_area
5025 */
5026
5027static pixman_bool_t
5028pixman_blt_sse2 (uint32_t *src_bits,
5029 uint32_t *dst_bits,
5030 int src_stride,
5031 int dst_stride,
5032 int src_bpp,
5033 int dst_bpp,
5034 int src_x,
5035 int src_y,
5036 int dst_x,
5037 int dst_y,
5038 int width,
5039 int height)
5040{
5041 uint8_t * src_bytes;
5042 uint8_t * dst_bytes;
5043 int byte_width;
5044
5045 if (src_bpp != dst_bpp)
5046 return FALSE0;
5047
5048 if (src_bpp == 16)
5049 {
5050 src_stride = src_stride * (int) sizeof (uint32_t) / 2;
5051 dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
5052 src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
5053 dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
5054 byte_width = 2 * width;
5055 src_stride *= 2;
5056 dst_stride *= 2;
5057 }
5058 else if (src_bpp == 32)
5059 {
5060 src_stride = src_stride * (int) sizeof (uint32_t) / 4;
5061 dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
5062 src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
5063 dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
5064 byte_width = 4 * width;
5065 src_stride *= 4;
5066 dst_stride *= 4;
5067 }
5068 else
5069 {
5070 return FALSE0;
5071 }
5072
5073 while (height--)
5074 {
5075 int w;
5076 uint8_t *s = src_bytes;
5077 uint8_t *d = dst_bytes;
5078 src_bytes += src_stride;
5079 dst_bytes += dst_stride;
5080 w = byte_width;
5081
5082 while (w >= 2 && ((unsigned long)d & 3))
5083 {
5084 *(uint16_t *)d = *(uint16_t *)s;
5085 w -= 2;
5086 s += 2;
5087 d += 2;
5088 }
5089
5090 while (w >= 4 && ((unsigned long)d & 15))
5091 {
5092 *(uint32_t *)d = *(uint32_t *)s;
5093
5094 w -= 4;
5095 s += 4;
5096 d += 4;
5097 }
5098
5099 while (w >= 64)
5100 {
5101 __m128i xmm0, xmm1, xmm2, xmm3;
5102
5103 xmm0 = load_128_unaligned ((__m128i*)(s));
5104 xmm1 = load_128_unaligned ((__m128i*)(s + 16));
5105 xmm2 = load_128_unaligned ((__m128i*)(s + 32));
5106 xmm3 = load_128_unaligned ((__m128i*)(s + 48));
5107
5108 save_128_aligned ((__m128i*)(d), xmm0);
5109 save_128_aligned ((__m128i*)(d + 16), xmm1);
5110 save_128_aligned ((__m128i*)(d + 32), xmm2);
5111 save_128_aligned ((__m128i*)(d + 48), xmm3);
5112
5113 s += 64;
5114 d += 64;
5115 w -= 64;
5116 }
5117
5118 while (w >= 16)
5119 {
5120 save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
5121
5122 w -= 16;
5123 d += 16;
5124 s += 16;
5125 }
5126
5127 while (w >= 4)
5128 {
5129 *(uint32_t *)d = *(uint32_t *)s;
5130
5131 w -= 4;
5132 s += 4;
5133 d += 4;
5134 }
5135
5136 if (w >= 2)
5137 {
5138 *(uint16_t *)d = *(uint16_t *)s;
5139 w -= 2;
5140 s += 2;
5141 d += 2;
5142 }
5143 }
5144
5145 _mm_empty ();
5146
5147 return TRUE1;
5148}
5149
5150static void
5151sse2_composite_copy_area (pixman_implementation_t *imp,
5152 pixman_op_t op,
5153 pixman_image_t * src_image,
5154 pixman_image_t * mask_image,
5155 pixman_image_t * dst_image,
5156 int32_t src_x,
5157 int32_t src_y,
5158 int32_t mask_x,
5159 int32_t mask_y,
5160 int32_t dest_x,
5161 int32_t dest_y,
5162 int32_t width,
5163 int32_t height)
5164{
5165 pixman_blt_sse2 (src_image->bits.bits,
5166 dst_image->bits.bits,
5167 src_image->bits.rowstride,
5168 dst_image->bits.rowstride,
5169 PIXMAN_FORMAT_BPP (src_image->bits.format)(((src_image->bits.format) >> 24) ),
5170 PIXMAN_FORMAT_BPP (dst_image->bits.format)(((dst_image->bits.format) >> 24) ),
5171 src_x, src_y, dest_x, dest_y, width, height);
5172}
5173
5174static void
5175sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
5176 pixman_op_t op,
5177 pixman_image_t * src_image,
5178 pixman_image_t * mask_image,
5179 pixman_image_t * dst_image,
5180 int32_t src_x,
5181 int32_t src_y,
5182 int32_t mask_x,
5183 int32_t mask_y,
5184 int32_t dest_x,
5185 int32_t dest_y,
5186 int32_t width,
5187 int32_t height)
5188{
5189 uint32_t *src, *src_line, s;
5190 uint32_t *dst, *dst_line, d;
5191 uint8_t *mask, *mask_line;
5192 uint32_t m;
5193 int src_stride, mask_stride, dst_stride;
5194 int32_t w;
5195 __m64 ms;
5196
5197 __m128i xmm_src, xmm_src_lo, xmm_src_hi;
5198 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5199 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
5200
5201 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
5202 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
5203 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
5204 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
5205 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
5206 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
5207
5208 while (height--)
5209 {
5210 src = src_line;
5211 src_line += src_stride;
5212 dst = dst_line;
5213 dst_line += dst_stride;
5214 mask = mask_line;
5215 mask_line += mask_stride;
5216
5217 w = width;
5218
5219 while (w && (unsigned long)dst & 15)
5220 {
5221 s = 0xff000000 | *src++;
5222 m = (uint32_t) *mask++;
5223 d = *dst;
5224 ms = unpack_32_1x64 (s);
5225
5226 if (m != 0xff)
5227 {
5228 __m64 ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
5229 __m64 md = unpack_32_1x64 (d);
5230
5231 ms = in_over_1x64 (&ms, &mask_x00ff, &ma, &md);
5232 }
5233
5234 *dst++ = pack_1x64_32 (ms);
5235 w--;
5236 }
5237
5238 while (w >= 4)
5239 {
5240 m = *(uint32_t*) mask;
5241 xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
5242
5243 if (m == 0xffffffff)
5244 {
5245 save_128_aligned ((__m128i*)dst, xmm_src);
5246 }
5247 else
5248 {
5249 xmm_dst = load_128_aligned ((__m128i*)dst);
5250
5251 xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
5252
5253 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5254 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
5255 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5256
5257 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
5258
5259 in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
5260
5261 save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5262 }
5263
5264 src += 4;
5265 dst += 4;
5266 mask += 4;
5267 w -= 4;
5268 }
5269
5270 while (w)
5271 {
5272 m = (uint32_t) *mask++;
5273
5274 if (m)
5275 {
5276 s = 0xff000000 | *src;
5277
5278 if (m == 0xff)
5279 {
5280 *dst = s;
5281 }
5282 else
5283 {
5284 __m64 ma, md, ms;
5285
5286 d = *dst;
5287
5288 ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
5289 md = unpack_32_1x64 (d);
5290 ms = unpack_32_1x64 (s);
5291
5292 *dst = pack_1x64_32 (in_over_1x64 (&ms, &mask_x00ff, &ma, &md));
5293 }
5294
5295 }
5296
5297 src++;
5298 dst++;
5299 w--;
5300 }
5301 }
5302
5303 _mm_empty ();
5304}
5305
5306static void
5307sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
5308 pixman_op_t op,
5309 pixman_image_t * src_image,
5310 pixman_image_t * mask_image,
5311 pixman_image_t * dst_image,
5312 int32_t src_x,
5313 int32_t src_y,
5314 int32_t mask_x,
5315 int32_t mask_y,
5316 int32_t dest_x,
5317 int32_t dest_y,
5318 int32_t width,
5319 int32_t height)
5320{
5321 uint32_t *src, *src_line, s;
5322 uint32_t *dst, *dst_line, d;
5323 uint8_t *mask, *mask_line;
5324 uint32_t m;
5325 int src_stride, mask_stride, dst_stride;
5326 int32_t w;
5327
5328 __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
5329 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5330 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
5331
5332 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
5333 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
5334 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
5335 mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
5336 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
5337 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
5338
5339 while (height--)
5340 {
5341 src = src_line;
5342 src_line += src_stride;
5343 dst = dst_line;
5344 dst_line += dst_stride;
5345 mask = mask_line;
5346 mask_line += mask_stride;
5347
5348 w = width;
5349
5350 while (w && (unsigned long)dst & 15)
5351 {
5352 uint32_t sa;
5353
5354 s = *src++;
5355 m = (uint32_t) *mask++;
5356 d = *dst;
5357
5358 sa = s >> 24;
5359
5360 if (m)
5361 {
5362 if (sa == 0xff && m == 0xff)
5363 {
5364 *dst = s;
5365 }
5366 else
5367 {
5368 __m64 ms, md, ma, msa;
5369
5370 ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
5371 ms = unpack_32_1x64 (s);
5372 md = unpack_32_1x64 (d);
5373
5374 msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
5375
5376 *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
5377 }
5378 }
5379
5380 dst++;
5381 w--;
5382 }
5383
5384 while (w >= 4)
5385 {
5386 m = *(uint32_t *) mask;
5387
5388 if (m)
5389 {
5390 xmm_src = load_128_unaligned ((__m128i*)src);
5391
5392 if (m == 0xffffffff && is_opaque (xmm_src))
5393 {
5394 save_128_aligned ((__m128i *)dst, xmm_src);
5395 }
5396 else
5397 {
5398 xmm_dst = load_128_aligned ((__m128i *)dst);
5399
5400 xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
5401
5402 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5403 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
5404 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5405
5406 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
5407 expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
5408
5409 in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
5410 &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
5411
5412 save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5413 }
5414 }
5415
5416 src += 4;
5417 dst += 4;
5418 mask += 4;
5419 w -= 4;
5420 }
5421
5422 while (w)
5423 {
5424 uint32_t sa;
5425
5426 s = *src++;
5427 m = (uint32_t) *mask++;
5428 d = *dst;
5429
5430 sa = s >> 24;
5431
5432 if (m)
5433 {
5434 if (sa == 0xff && m == 0xff)
5435 {
5436 *dst = s;
5437 }
5438 else
5439 {
5440 __m64 ms, md, ma, msa;
5441
5442 ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
5443 ms = unpack_32_1x64 (s);
5444 md = unpack_32_1x64 (d);
5445
5446 msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
5447
5448 *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
5449 }
5450 }
5451
5452 dst++;
5453 w--;
5454 }
5455 }
5456
5457 _mm_empty ();
5458}
5459
5460static void
5461sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
5462 pixman_op_t op,
5463 pixman_image_t * src_image,
5464 pixman_image_t * mask_image,
5465 pixman_image_t * dst_image,
5466 int32_t src_x,
5467 int32_t src_y,
5468 int32_t mask_x,
5469 int32_t mask_y,
5470 int32_t dest_x,
5471 int32_t dest_y,
5472 int32_t width,
5473 int32_t height)
5474{
5475 uint32_t src;
5476 uint32_t *dst_line, *dst;
5477 __m128i xmm_src;
5478 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5479 __m128i xmm_dsta_hi, xmm_dsta_lo;
5480 int dst_stride;
5481 int32_t w;
5482
5483 src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
5484
5485 if (src == 0)
5486 return;
5487
5488 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
5489 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
5490
5491 xmm_src = expand_pixel_32_1x128 (src);
5492
5493 while (height--)
5494 {
5495 dst = dst_line;
5496
5497 dst_line += dst_stride;
5498 w = width;
5499
5500 while (w && (unsigned long)dst & 15)
5501 {
5502 __m64 vd;
5503
5504 vd = unpack_32_1x64 (*dst);
5505
5506 *dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),
5507 _mm_movepi64_pi64 (xmm_src)));
5508 w--;
5509 dst++;
5510 }
5511
5512 while (w >= 4)
5513 {
5514 __m128i tmp_lo, tmp_hi;
5515
5516 xmm_dst = load_128_aligned ((__m128i*)dst);
5517
5518 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5519 expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);
5520
5521 tmp_lo = xmm_src;
5522 tmp_hi = xmm_src;
5523
5524 over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
5525 &xmm_dsta_lo, &xmm_dsta_hi,
5526 &tmp_lo, &tmp_hi);
5527
5528 save_128_aligned (
5529 (__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));
5530
5531 w -= 4;
5532 dst += 4;
5533 }
5534
5535 while (w)
5536 {
5537 __m64 vd;
5538
5539 vd = unpack_32_1x64 (*dst);
5540
5541 *dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),
5542 _mm_movepi64_pi64 (xmm_src)));
5543 w--;
5544 dst++;
5545 }
5546
5547 }
5548
5549 _mm_empty ();
5550}
5551
5552static void
5553sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
5554 pixman_op_t op,
5555 pixman_image_t * src_image,
5556 pixman_image_t * mask_image,
5557 pixman_image_t * dst_image,
5558 int32_t src_x,
5559 int32_t src_y,
5560 int32_t mask_x,
5561 int32_t mask_y,
5562 int32_t dest_x,
5563 int32_t dest_y,
5564 int32_t width,
5565 int32_t height)
5566{
5567 uint32_t *src, *src_line, s;
5568 uint32_t *dst, *dst_line, d;
5569 uint32_t *mask, *mask_line;
5570 uint32_t m;
5571 int src_stride, mask_stride, dst_stride;
5572 int32_t w;
5573
5574 __m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;
5575 __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
5576 __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
5577
5578 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
5579 dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)
;
5580 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
5581 mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)
;
5582 PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
5583 src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)
;
5584
5585 while (height--)
5586 {
5587 src = src_line;
5588 src_line += src_stride;
5589 dst = dst_line;
5590 dst_line += dst_stride;
5591 mask = mask_line;
5592 mask_line += mask_stride;
5593
5594 w = width;
5595
5596 while (w && (unsigned long)dst & 15)
5597 {
5598 uint32_t sa;
5599
5600 s = *src++;
5601 m = (*mask++) >> 24;
5602 d = *dst;
5603
5604 sa = s >> 24;
5605
5606 if (m)
5607 {
5608 if (sa == 0xff && m == 0xff)
5609 {
5610 *dst = s;
5611 }
5612 else
5613 {
5614 __m64 ms, md, ma, msa;
5615
5616 ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
5617 ms = unpack_32_1x64 (s);
5618 md = unpack_32_1x64 (d);
5619
5620 msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
5621
5622 *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
5623 }
5624 }
5625
5626 dst++;
5627 w--;
5628 }
5629
5630 while (w >= 4)
5631 {
5632 xmm_mask = load_128_unaligned ((__m128i*)mask);
5633
5634 if (!is_transparent (xmm_mask))
5635 {
5636 xmm_src = load_128_unaligned ((__m128i*)src);
5637
5638 if (is_opaque (xmm_mask) && is_opaque (xmm_src))
5639 {
5640 save_128_aligned ((__m128i *)dst, xmm_src);
5641 }
5642 else
5643 {
5644 xmm_dst = load_128_aligned ((__m128i *)dst);
5645
5646 unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
5647 unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
5648 unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
5649
5650 expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);
5651 expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
5652
5653 in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,
5654 &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
5655
5656 save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5657 }
5658 }
5659
5660 src += 4;
5661 dst += 4;
5662 mask += 4;
5663 w -= 4;
5664 }
5665
5666 while (w)
5667 {
5668 uint32_t sa;
5669
5670 s = *src++;
5671 m = (*mask++) >> 24;
5672 d = *dst;
5673
5674 sa = s >> 24;
5675
5676 if (m)
5677 {
5678 if (sa == 0xff && m == 0xff)
5679 {
5680 *dst = s;
5681 }
5682 else
5683 {
5684 __m64 ms, md, ma, msa;
5685
5686 ma = expand_alpha_rev_1x64 (load_32_1x64 (m));
5687 ms = unpack_32_1x64 (s);
5688 md = unpack_32_1x64 (d);
5689
5690 msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));
5691
5692 *dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));
5693 }
5694 }
5695
5696 dst++;
5697 w--;
5698 }
5699 }
5700
5701 _mm_empty ();
5702}
5703
5704/* A variant of 'core_combine_over_u_sse2' with minor tweaks */
5705static force_inline__inline__ __attribute__ ((__always_inline__)) void
5706scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
5707 const uint32_t* ps,
5708 int32_t w,
5709 pixman_fixed_t vx,
5710 pixman_fixed_t unit_x,
5711 pixman_fixed_t max_vx)
5712{
5713 uint32_t s, d;
5714 const uint32_t* pm = NULL((void*)0);
5715
5716 __m128i xmm_dst_lo, xmm_dst_hi;
5717 __m128i xmm_src_lo, xmm_src_hi;
5718 __m128i xmm_alpha_lo, xmm_alpha_hi;
5719
5720 /* Align dst on a 16-byte boundary */
5721 while (w && ((unsigned long)pd & 15))
5722 {
5723 d = *pd;
5724 s = combine1 (ps + (vx >> 16), pm);
5725 vx += unit_x;
5726
5727 *pd++ = core_combine_over_u_pixel_sse2 (s, d);
5728 if (pm)
5729 pm++;
5730 w--;
5731 }
5732
5733 while (w >= 4)
5734 {
5735 __m128i tmp;
5736 uint32_t tmp1, tmp2, tmp3, tmp4;
5737
5738 tmp1 = ps[vx >> 16];
5739 vx += unit_x;
5740 tmp2 = ps[vx >> 16];
5741 vx += unit_x;
5742 tmp3 = ps[vx >> 16];
5743 vx += unit_x;
5744 tmp4 = ps[vx >> 16];
5745 vx += unit_x;
5746
5747 tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
5748
5749 xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);
5750
5751 if (is_opaque (xmm_src_hi))
5752 {
5753 save_128_aligned ((__m128i*)pd, xmm_src_hi);
5754 }
5755 else if (!is_zero (xmm_src_hi))
5756 {
5757 xmm_dst_hi = load_128_aligned ((__m128i*) pd);
5758
5759 unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
5760 unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
5761
5762 expand_alpha_2x128 (
5763 xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
5764
5765 over_2x128 (&xmm_src_lo, &xmm_src_hi,
5766 &xmm_alpha_lo, &xmm_alpha_hi,
5767 &xmm_dst_lo, &xmm_dst_hi);
5768
5769 /* rebuid the 4 pixel data and save*/
5770 save_128_aligned ((__m128i*)pd,
5771 pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
5772 }
5773
5774 w -= 4;
5775 pd += 4;
5776 if (pm)
5777 pm += 4;
5778 }
5779
5780 while (w)
5781 {
5782 d = *pd;
5783 s = combine1 (ps + (vx >> 16), pm);
5784 vx += unit_x;
5785
5786 *pd++ = core_combine_over_u_pixel_sse2 (s, d);
5787 if (pm)
5788 pm++;
5789
5790 w--;
5791 }
5792 _mm_empty ();
5793}
5794
5795FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
-1 == PIXMAN_REPEAT_NORMAL) { max_vx = src_image->bits.width
<< 16; max_vy = src_image->bits.height << 16;
repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &
vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { static uint32_t
zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.
height) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero
, left_pad + width + right_pad, 0, 0, 0); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }
5796 scaled_nearest_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
-1 == PIXMAN_REPEAT_NORMAL) { max_vx = src_image->bits.width
<< 16; max_vy = src_image->bits.height << 16;
repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &
vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { static uint32_t
zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.
height) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero
, left_pad + width + right_pad, 0, 0, 0); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }
5797 uint32_t, uint32_t, COVER)static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
-1 == PIXMAN_REPEAT_NORMAL) { max_vx = src_image->bits.width
<< 16; max_vy = src_image->bits.height << 16;
repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &
vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { static uint32_t
zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.
height) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero
, left_pad + width + right_pad, 0, 0, 0); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }
5798FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }
5799 scaled_nearest_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }
5800 uint32_t, uint32_t, NONE)static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }
5801FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }
5802 scaled_nearest_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }
5803 uint32_t, uint32_t, PAD)static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }
5804
5805static const pixman_fast_path_t sse2_fast_paths[] =
5806{
5807 /* PIXMAN_OP_OVER */
5808 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_over_n_8_0565 }
,
5809 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_over_n_8_0565 }
,
5810 PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888
}
,
5811 PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888
}
,
5812 PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_0565
}
,
5813 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_8888 }
,
5814 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_8888 }
,
5815 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_8888 }
,
5816 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_8888 }
,
5817 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_r5g6b5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_8888_0565 }
,
5818 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_b5g6r5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_8888_0565 }
,
5819 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
}
,
5820 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
}
,
5821 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
}
,
5822 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
}
,
5823 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
== (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888_8888
}
,
5824 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
}
,
5825 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
}
,
5826 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
}
,
5827 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
}
,
5828 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
}
,
5829 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
}
,
5830 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
}
,
5831 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
}
,
5832 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_x888_n_8888 }
,
5833 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_x888_n_8888 }
,
5834 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_x888_n_8888 }
,
5835 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_x888_n_8888 }
,
5836 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_n_8888 }
,
5837 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_n_8888 }
,
5838 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_n_8888 }
,
5839 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_n_8888 }
,
5840 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca
}
,
5841 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca
}
,
5842 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca
}
,
5843 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca
}
,
5844 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_0565_ca
}
,
5845 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_0565_ca
}
,
5846 PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((2) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
}
,
5847 PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((2) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
}
,
5848 PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((3) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
}
,
5849 PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((3) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
}
,
5850 PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((2) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565
}
,
5851 PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((3) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565
}
,
5852 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area }
,
5853 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area }
,
5854
5855 /* PIXMAN_OP_OVER_REVERSE */
5856 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((1 << 2) | (1 << 5) | (1 << 1) |
(1 << 6)) | (((((0) << 24) | ((1) << 16) |
((0) << 12) | ((0) << 8) | ((0) << 4) | ((
0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))), (((0) << 24) | ((0)
<< 16) | ((0) << 12) | ((0) << 8) | ((0) <<
4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))) ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((
((0) << 24) | ((0) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888
}
,
5857 PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((1 << 2) | (1 << 5) | (1 << 1) |
(1 << 6)) | (((((0) << 24) | ((1) << 16) |
((0) << 12) | ((0) << 8) | ((0) << 4) | ((
0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))), (((0) << 24) | ((0)
<< 16) | ((0) << 12) | ((0) << 8) | ((0) <<
4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))) ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((
((0) << 24) | ((0) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888
}
,
5858
5859 /* PIXMAN_OP_ADD */
5860 PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888_8888_ca
}
,
5861 PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8){ PIXMAN_OP_ADD, PIXMAN_a8, (((1 << 2) | (1 << 5)
| (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8_8
}
,
5862 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_add_8888_8888 }
,
5863 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_add_8888_8888 }
,
5864 PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_add_n_8_8 }
,
5865 PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8
}
,
5866
5867 /* PIXMAN_OP_SRC */
5868 PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }
,
5869 PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }
,
5870 PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }
,
5871 PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 }
,
5872 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_src_x888_8888 }
,
5873 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_src_x888_8888 }
,
5874 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area }
,
5875 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area }
,
5876 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area }
,
5877 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area }
,
5878 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area }
,
5879 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area }
,
5880 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_r5g6b5, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_r5g6b5 == (
((0) << 24) | ((1) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_r5g6b5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_copy_area }
,
5881 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_b5g6r5, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_b5g6r5 == (
((0) << 24) | ((1) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_b5g6r5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_copy_area }
,
5882
5883 /* PIXMAN_OP_IN */
5884 PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8){ PIXMAN_OP_IN, PIXMAN_a8, (((1 << 2) | (1 << 5) |
(1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_8_8
}
,
5885 PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))),
(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_in_n_8_8 }
,
5886 PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))),
(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_n_8
}
,
5887
5888 SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 16), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }
,
5889 SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 16), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }
,
5890 SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 16), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }
,
5891 SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 16), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, }
,
5892 SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 14) | (1 << 3) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
, }
,
5893 SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 14) | (1 << 3) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
, }
,
5894 SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 14) | (1 << 3) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
, }
,
5895 SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 14) | (1 << 3) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
, }
,
5896 SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }
,
5897 SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }
,
5898 SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }
,
5899 SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, }
,
5900
5901 { PIXMAN_OP_NONE },
5902};
5903
5904static pixman_bool_t
5905sse2_blt (pixman_implementation_t *imp,
5906 uint32_t * src_bits,
5907 uint32_t * dst_bits,
5908 int src_stride,
5909 int dst_stride,
5910 int src_bpp,
5911 int dst_bpp,
5912 int src_x,
5913 int src_y,
5914 int dst_x,
5915 int dst_y,
5916 int width,
5917 int height)
5918{
5919 if (!pixman_blt_sse2 (
5920 src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
5921 src_x, src_y, dst_x, dst_y, width, height))
5922
5923 {
5924 return _pixman_implementation_blt (
5925 imp->delegate,
5926 src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
5927 src_x, src_y, dst_x, dst_y, width, height);
5928 }
5929
5930 return TRUE1;
5931}
5932
5933#if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1)
5934__attribute__((__force_align_arg_pointer__))
5935#endif
5936static pixman_bool_t
5937sse2_fill (pixman_implementation_t *imp,
5938 uint32_t * bits,
5939 int stride,
5940 int bpp,
5941 int x,
5942 int y,
5943 int width,
5944 int height,
5945 uint32_t xor)
5946{
5947 if (!pixman_fill_sse2 (bits, stride, bpp, x, y, width, height, xor))
5948 {
5949 return _pixman_implementation_fill (
5950 imp->delegate, bits, stride, bpp, x, y, width, height, xor);
5951 }
5952
5953 return TRUE1;
5954}
5955
5956#if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1)
5957__attribute__((__force_align_arg_pointer__))
5958#endif
5959pixman_implementation_t *
5960_pixman_implementation_create_sse2 (void)
5961{
5962#ifdef USE_MMX1
5963 pixman_implementation_t *fallback = _pixman_implementation_create_mmx ();
5964#else
5965 pixman_implementation_t *fallback = _pixman_implementation_create_fast_path ();
5966#endif
5967 pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);
5968
5969 /* SSE2 constants */
5970 mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000);
5971 mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000);
5972 mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0);
5973 mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f);
5974 mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000);
5975 mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00);
5976 mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8);
5977 mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0);
5978 mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000);
5979 mask_0080 = create_mask_16_128 (0x0080);
5980 mask_00ff = create_mask_16_128 (0x00ff);
5981 mask_0101 = create_mask_16_128 (0x0101);
5982 mask_ffff = create_mask_16_128 (0xffff);
5983 mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
5984 mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
5985
5986 /* MMX constants */
5987 mask_x565_rgb = create_mask_2x32_64 (0x000001f0, 0x003f001f);
5988 mask_x565_unpack = create_mask_2x32_64 (0x00000084, 0x04100840);
5989
5990 mask_x0080 = create_mask_16_64 (0x0080);
5991 mask_x00ff = create_mask_16_64 (0x00ff);
5992 mask_x0101 = create_mask_16_64 (0x0101);
5993 mask_x_alpha = create_mask_2x32_64 (0x00ff0000, 0x00000000);
5994
5995 _mm_empty ();
5996
5997 /* Set up function pointers */
5998
5999 /* SSE code patch for fbcompose.c */
6000 imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
6001 imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
6002 imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;
6003 imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u;
6004 imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u;
6005 imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u;
6006 imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u;
6007 imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;
6008 imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;
6009 imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
6010
6011 imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
6012
6013 imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
6014 imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
6015 imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;
6016 imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca;
6017 imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca;
6018 imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca;
6019 imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca;
6020 imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca;
6021 imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;
6022 imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
6023 imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
6024
6025 imp->blt = sse2_blt;
6026 imp->fill = sse2_fill;
6027
6028 return imp;
6029}
6030
6031#endif /* USE_SSE2 */