pixman-sse2.c

Bug Summary

File:	pixman/pixman-sse2.c
Location:	line 5801, column 1
Description:	Assigned value is garbage or undefined

Annotated Source Code

* Permission to use, copy, modify, distribute, and sell this software and its

* documentation for any purpose is hereby granted without fee, provided that

* the above copyright notice appear in all copies and that both that

* copyright notice and this permission notice appear in supporting

* documentation, and that the name of Red Hat not be used in advertising or

* publicity pertaining to distribution of the software without specific,

* written prior permission. Red Hat makes no representations about the

* suitability of this software for any purpose. It is provided "as is"

* without express or implied warranty.

* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS

* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND

* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY

* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN

* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING

* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS

* SOFTWARE.

* Author: Rodrigo Kumpera (kumpera@gmail.com)

* André Tupinambá (andrelrt@gmail.com)

* Based on work by Owen Taylor and Søren Sandmann

#ifdef HAVE_CONFIG_H1

#include <config.h>

#endif

#include <mmintrin.h>

#include <xmmintrin.h> /* for _mm_shuffle_pi16 and _MM_SHUFFLE */

#include <emmintrin.h> /* for SSE2 intrinsics */

#include "pixman-private.h"

#include "pixman-combine32.h"

#include "pixman-fast-path.h"

#if defined(_MSC_VER) && defined(_M_AMD64)

/* Windows 64 doesn't allow MMX to be used, so

* the pixman-x64-mmx-emulation.h file contains

* implementations of those MMX intrinsics that

* are used in the SSE2 implementation.

# include "pixman-x64-mmx-emulation.h"

#endif

#ifdef USE_SSE21

/* --------------------------------------------------------------------

* Locals

static __m64 mask_x0080;

static __m64 mask_x00ff;

static __m64 mask_x0101;

static __m64 mask_x_alpha;

static __m64 mask_x565_rgb;

static __m64 mask_x565_unpack;

static __m128i mask_0080;

static __m128i mask_00ff;

static __m128i mask_0101;

static __m128i mask_ffff;

static __m128i mask_ff000000;

static __m128i mask_alpha;

static __m128i mask_565_r;

static __m128i mask_565_g1, mask_565_g2;

static __m128i mask_565_b;

static __m128i mask_red;

static __m128i mask_green;

static __m128i mask_blue;

static __m128i mask_565_fix_rb;

static __m128i mask_565_fix_g;

/* ----------------------------------------------------------------------

* SSE2 Inlines

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

unpack_32_1x128 (uint32_t data)

{

return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());

}

static force_inline__inline__ __attribute__ ((__always_inline__)) void

unpack_128_2x128 (__m128i data, __m128i* data_lo, __m128i* data_hi)

{

*data_lo = _mm_unpacklo_epi8 (data, _mm_setzero_si128 ());

*data_hi = _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());

}

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

unpack_565_to_8888 (__m128i lo)

{

__m128i r, g, b, rb, t;

100

101

r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);

102

g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);

103

b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);

104

105

rb = _mm_or_si128 (r, b);

106

t = _mm_and_si128 (rb, mask_565_fix_rb);

107

t = _mm_srli_epi32 (t, 5);

108

rb = _mm_or_si128 (rb, t);

109

110

t = _mm_and_si128 (g, mask_565_fix_g);

111

t = _mm_srli_epi32 (t, 6);

112

g = _mm_or_si128 (g, t);

113

114

return _mm_or_si128 (rb, g);

115

}

116

117

static force_inline__inline__ __attribute__ ((__always_inline__)) void

118

unpack_565_128_4x128 (__m128i data,

119

__m128i* data0,

120

__m128i* data1,

121

__m128i* data2,

122

__m128i* data3)

123

{

124

__m128i lo, hi;

125

126

lo = _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());

127

hi = _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());

128

129

lo = unpack_565_to_8888 (lo);

130

hi = unpack_565_to_8888 (hi);

131

132

unpack_128_2x128 (lo, data0, data1);

133

unpack_128_2x128 (hi, data2, data3);

134

}

135

136

static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t

137

pack_565_32_16 (uint32_t pixel)

138

{

139

return (uint16_t) (((pixel >> 8) & 0xf800) |

140

((pixel >> 5) & 0x07e0) |

141

((pixel >> 3) & 0x001f));

142

}

143

144

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

145

pack_2x128_128 (__m128i lo, __m128i hi)

146

{

147

return _mm_packus_epi16 (lo, hi);

148

}

149

150

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

151

pack_565_2x128_128 (__m128i lo, __m128i hi)

152

{

153

__m128i data;

154

__m128i r, g1, g2, b;

155

156

data = pack_2x128_128 (lo, hi);

157

158

r = _mm_and_si128 (data, mask_565_r);

159

g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);

160

g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);

161

b = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);

162

163

return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);

164

}

165

166

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

167

pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)

168

{

169

return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),

170

pack_565_2x128_128 (*xmm2, *xmm3));

171

}

172

173

static force_inline__inline__ __attribute__ ((__always_inline__)) int

174

is_opaque (__m128i x)

175

{

176

__m128i ffs = _mm_cmpeq_epi8 (x, x);

177

178

return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;

179

}

180

181

static force_inline__inline__ __attribute__ ((__always_inline__)) int

182

is_zero (__m128i x)

183

{

184

return _mm_movemask_epi8 (

185

_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;

186

}

187

188

static force_inline__inline__ __attribute__ ((__always_inline__)) int

189

is_transparent (__m128i x)

190

{

191

return (_mm_movemask_epi8 (

192

_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;

193

}

194

195

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

196

expand_pixel_32_1x128 (uint32_t data)

197

{

198

return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0))((__m128i)__builtin_shufflevector((__v4si)(unpack_32_1x128 (data
)), (__v4si) {0}, ((((1) << 6) | ((0) << 4) | ((1
) << 2) | (0))) & 0x3, (((((1) << 6) | ((0) <<
4) | ((1) << 2) | (0))) & 0xc) >> 2, (((((1)
<< 6) | ((0) << 4) | ((1) << 2) | (0))) &
0x30) >> 4, (((((1) << 6) | ((0) << 4) | (
(1) << 2) | (0))) & 0xc0) >> 6));

199

}

200

201

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

202

expand_alpha_1x128 (__m128i data)

203

{

204

return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,((__m128i)__builtin_shufflevector((__v8hi)(((__m128i)__builtin_shufflevector
((__v8hi)(data), (__v8hi) {0}, ((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0x3, (((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0xc) >>
2, (((((3) << 6) | ((3) << 4) | ((3) << 2)
| (3))) & 0x30) >> 4, (((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6
, 7))), (__v8hi) {0}, 0, 1, 2, 3, 4 + ((((((3) << 6) | (
(3) << 4) | ((3) << 2) | (3))) & 0x03) >>
0), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0x30) >>
4), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6)))

205

_MM_SHUFFLE (3, 3, 3, 3)),((__m128i)__builtin_shufflevector((__v8hi)(((__m128i)__builtin_shufflevector
((__v8hi)(data), (__v8hi) {0}, ((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0x3, (((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0xc) >>
2, (((((3) << 6) | ((3) << 4) | ((3) << 2)
| (3))) & 0x30) >> 4, (((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6
, 7))), (__v8hi) {0}, 0, 1, 2, 3, 4 + ((((((3) << 6) | (
(3) << 4) | ((3) << 2) | (3))) & 0x03) >>
0), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0x30) >>
4), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6)))

206

_MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(((__m128i)__builtin_shufflevector
((__v8hi)(data), (__v8hi) {0}, ((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0x3, (((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0xc) >>
2, (((((3) << 6) | ((3) << 4) | ((3) << 2)
| (3))) & 0x30) >> 4, (((((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))) & 0xc0) >> 6, 4, 5, 6
, 7))), (__v8hi) {0}, 0, 1, 2, 3, 4 + ((((((3) << 6) | (
(3) << 4) | ((3) << 2) | (3))) & 0x03) >>
0), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0x0c) >> 2), 4 + ((((((3) << 6)
| ((3) << 4) | ((3) << 2) | (3))) & 0x30) >>
4), 4 + ((((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6)));

207

}

208

209

static force_inline__inline__ __attribute__ ((__always_inline__)) void

210

expand_alpha_2x128 (__m128i data_lo,

211

__m128i data_hi,

212

__m128i* alpha_lo,

213

__m128i* alpha_hi)

214

{

215

__m128i lo, hi;

216

217

lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(data_lo), (__v8hi)
{0}, ((((3) << 6) | ((3) << 4) | ((3) << 2
) | (3))) & 0x3, (((((3) << 6) | ((3) << 4) |
((3) << 2) | (3))) & 0xc) >> 2, (((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3))) & 0x30)
>> 4, (((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6, 4, 5, 6, 7));

218

hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(data_hi), (__v8hi)
{0}, ((((3) << 6) | ((3) << 4) | ((3) << 2
) | (3))) & 0x3, (((((3) << 6) | ((3) << 4) |
((3) << 2) | (3))) & 0xc) >> 2, (((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3))) & 0x30)
>> 4, (((((3) << 6) | ((3) << 4) | ((3) <<
2) | (3))) & 0xc0) >> 6, 4, 5, 6, 7));

219

220

*alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(lo), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((3) << 6) | ((3) << 4) | ((
3) << 2) | (3))) & 0x03) >> 0), 4 + ((((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3))) & 0x0c)
>> 2), 4 + ((((((3) << 6) | ((3) << 4) | (
(3) << 2) | (3))) & 0x30) >> 4), 4 + ((((((3)
<< 6) | ((3) << 4) | ((3) << 2) | (3))) &
0xc0) >> 6)));

221

*alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3))((__m128i)__builtin_shufflevector((__v8hi)(hi), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((3) << 6) | ((3) << 4) | ((
3) << 2) | (3))) & 0x03) >> 0), 4 + ((((((3) <<
6) | ((3) << 4) | ((3) << 2) | (3))) & 0x0c)
>> 2), 4 + ((((((3) << 6) | ((3) << 4) | (
(3) << 2) | (3))) & 0x30) >> 4), 4 + ((((((3)
<< 6) | ((3) << 4) | ((3) << 2) | (3))) &
0xc0) >> 6)));

222

}

223

224

static force_inline__inline__ __attribute__ ((__always_inline__)) void

225

expand_alpha_rev_2x128 (__m128i data_lo,

226

__m128i data_hi,

227

__m128i* alpha_lo,

228

__m128i* alpha_hi)

229

{

230

__m128i lo, hi;

231

232

lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_shufflevector((__v8hi)(data_lo), (__v8hi)
{0}, ((((0) << 6) | ((0) << 4) | ((0) << 2
) | (0))) & 0x3, (((((0) << 6) | ((0) << 4) |
((0) << 2) | (0))) & 0xc) >> 2, (((((0) <<
6) | ((0) << 4) | ((0) << 2) | (0))) & 0x30)
>> 4, (((((0) << 6) | ((0) << 4) | ((0) <<
2) | (0))) & 0xc0) >> 6, 4, 5, 6, 7));

233

hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_shufflevector((__v8hi)(data_hi), (__v8hi)
{0}, ((((0) << 6) | ((0) << 4) | ((0) << 2
) | (0))) & 0x3, (((((0) << 6) | ((0) << 4) |
((0) << 2) | (0))) & 0xc) >> 2, (((((0) <<
6) | ((0) << 4) | ((0) << 2) | (0))) & 0x30)
>> 4, (((((0) << 6) | ((0) << 4) | ((0) <<
2) | (0))) & 0xc0) >> 6, 4, 5, 6, 7));

234

*alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_shufflevector((__v8hi)(lo), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((0) << 6) | ((0) << 4) | ((
0) << 2) | (0))) & 0x03) >> 0), 4 + ((((((0) <<
6) | ((0) << 4) | ((0) << 2) | (0))) & 0x0c)
>> 2), 4 + ((((((0) << 6) | ((0) << 4) | (
(0) << 2) | (0))) & 0x30) >> 4), 4 + ((((((0)
<< 6) | ((0) << 4) | ((0) << 2) | (0))) &
0xc0) >> 6)));

235

*alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0))((__m128i)__builtin_shufflevector((__v8hi)(hi), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((0) << 6) | ((0) << 4) | ((
0) << 2) | (0))) & 0x03) >> 0), 4 + ((((((0) <<
6) | ((0) << 4) | ((0) << 2) | (0))) & 0x0c)
>> 2), 4 + ((((((0) << 6) | ((0) << 4) | (
(0) << 2) | (0))) & 0x30) >> 4), 4 + ((((((0)
<< 6) | ((0) << 4) | ((0) << 2) | (0))) &
0xc0) >> 6)));

236

}

237

238

static force_inline__inline__ __attribute__ ((__always_inline__)) void

239

pix_multiply_2x128 (__m128i* data_lo,

240

__m128i* data_hi,

241

__m128i* alpha_lo,

242

__m128i* alpha_hi,

243

__m128i* ret_lo,

244

__m128i* ret_hi)

245

{

246

__m128i lo, hi;

247

248

lo = _mm_mullo_epi16 (*data_lo, *alpha_lo);

249

hi = _mm_mullo_epi16 (*data_hi, *alpha_hi);

250

lo = _mm_adds_epu16 (lo, mask_0080);

251

hi = _mm_adds_epu16 (hi, mask_0080);

252

*ret_lo = _mm_mulhi_epu16 (lo, mask_0101);

253

*ret_hi = _mm_mulhi_epu16 (hi, mask_0101);

254

}

255

256

static force_inline__inline__ __attribute__ ((__always_inline__)) void

257

pix_add_multiply_2x128 (__m128i* src_lo,

258

__m128i* src_hi,

259

__m128i* alpha_dst_lo,

260

__m128i* alpha_dst_hi,

261

__m128i* dst_lo,

262

__m128i* dst_hi,

263

__m128i* alpha_src_lo,

264

__m128i* alpha_src_hi,

265

__m128i* ret_lo,

266

__m128i* ret_hi)

267

{

268

__m128i t1_lo, t1_hi;

269

__m128i t2_lo, t2_hi;

270

271

pix_multiply_2x128 (src_lo, src_hi, alpha_dst_lo, alpha_dst_hi, &t1_lo, &t1_hi);

272

pix_multiply_2x128 (dst_lo, dst_hi, alpha_src_lo, alpha_src_hi, &t2_lo, &t2_hi);

273

274

*ret_lo = _mm_adds_epu8 (t1_lo, t2_lo);

275

*ret_hi = _mm_adds_epu8 (t1_hi, t2_hi);

276

}

277

278

static force_inline__inline__ __attribute__ ((__always_inline__)) void

279

negate_2x128 (__m128i data_lo,

280

__m128i data_hi,

281

__m128i* neg_lo,

282

__m128i* neg_hi)

283

{

284

*neg_lo = _mm_xor_si128 (data_lo, mask_00ff);

285

*neg_hi = _mm_xor_si128 (data_hi, mask_00ff);

286

}

287

288

static force_inline__inline__ __attribute__ ((__always_inline__)) void

289

invert_colors_2x128 (__m128i data_lo,

290

__m128i data_hi,

291

__m128i* inv_lo,

292

__m128i* inv_hi)

293

{

294

__m128i lo, hi;

295

296

lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_shufflevector((__v8hi)(data_lo), (__v8hi)
{0}, ((((3) << 6) | ((0) << 4) | ((1) << 2
) | (2))) & 0x3, (((((3) << 6) | ((0) << 4) |
((1) << 2) | (2))) & 0xc) >> 2, (((((3) <<
6) | ((0) << 4) | ((1) << 2) | (2))) & 0x30)
>> 4, (((((3) << 6) | ((0) << 4) | ((1) <<
2) | (2))) & 0xc0) >> 6, 4, 5, 6, 7));

297

hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_shufflevector((__v8hi)(data_hi), (__v8hi)
{0}, ((((3) << 6) | ((0) << 4) | ((1) << 2
) | (2))) & 0x3, (((((3) << 6) | ((0) << 4) |
((1) << 2) | (2))) & 0xc) >> 2, (((((3) <<
6) | ((0) << 4) | ((1) << 2) | (2))) & 0x30)
>> 4, (((((3) << 6) | ((0) << 4) | ((1) <<
2) | (2))) & 0xc0) >> 6, 4, 5, 6, 7));

298

*inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_shufflevector((__v8hi)(lo), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((3) << 6) | ((0) << 4) | ((
1) << 2) | (2))) & 0x03) >> 0), 4 + ((((((3) <<
6) | ((0) << 4) | ((1) << 2) | (2))) & 0x0c)
>> 2), 4 + ((((((3) << 6) | ((0) << 4) | (
(1) << 2) | (2))) & 0x30) >> 4), 4 + ((((((3)
<< 6) | ((0) << 4) | ((1) << 2) | (2))) &
0xc0) >> 6)));

299

*inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2))((__m128i)__builtin_shufflevector((__v8hi)(hi), (__v8hi) {0},
0, 1, 2, 3, 4 + ((((((3) << 6) | ((0) << 4) | ((
1) << 2) | (2))) & 0x03) >> 0), 4 + ((((((3) <<
6) | ((0) << 4) | ((1) << 2) | (2))) & 0x0c)
>> 2), 4 + ((((((3) << 6) | ((0) << 4) | (
(1) << 2) | (2))) & 0x30) >> 4), 4 + ((((((3)
<< 6) | ((0) << 4) | ((1) << 2) | (2))) &
0xc0) >> 6)));

300

}

301

302

static force_inline__inline__ __attribute__ ((__always_inline__)) void

303

over_2x128 (__m128i* src_lo,

304

__m128i* src_hi,

305

__m128i* alpha_lo,

306

__m128i* alpha_hi,

307

__m128i* dst_lo,

308

__m128i* dst_hi)

309

{

310

__m128i t1, t2;

311

312

negate_2x128 (*alpha_lo, *alpha_hi, &t1, &t2);

313

314

pix_multiply_2x128 (dst_lo, dst_hi, &t1, &t2, dst_lo, dst_hi);

315

316

*dst_lo = _mm_adds_epu8 (*src_lo, *dst_lo);

317

*dst_hi = _mm_adds_epu8 (*src_hi, *dst_hi);

318

}

319

320

static force_inline__inline__ __attribute__ ((__always_inline__)) void

321

over_rev_non_pre_2x128 (__m128i src_lo,

322

__m128i src_hi,

323

__m128i* dst_lo,

324

__m128i* dst_hi)

325

{

326

__m128i lo, hi;

327

__m128i alpha_lo, alpha_hi;

328

329

expand_alpha_2x128 (src_lo, src_hi, &alpha_lo, &alpha_hi);

330

331

lo = _mm_or_si128 (alpha_lo, mask_alpha);

332

hi = _mm_or_si128 (alpha_hi, mask_alpha);

333

334

invert_colors_2x128 (src_lo, src_hi, &src_lo, &src_hi);

335

336

pix_multiply_2x128 (&src_lo, &src_hi, &lo, &hi, &lo, &hi);

337

338

over_2x128 (&lo, &hi, &alpha_lo, &alpha_hi, dst_lo, dst_hi);

339

}

340

341

static force_inline__inline__ __attribute__ ((__always_inline__)) void

342

in_over_2x128 (__m128i* src_lo,

343

__m128i* src_hi,

344

__m128i* alpha_lo,

345

__m128i* alpha_hi,

346

__m128i* mask_lo,

347

__m128i* mask_hi,

348

__m128i* dst_lo,

349

__m128i* dst_hi)

350

{

351

__m128i s_lo, s_hi;

352

__m128i a_lo, a_hi;

353

354

pix_multiply_2x128 (src_lo, src_hi, mask_lo, mask_hi, &s_lo, &s_hi);

355

pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);

356

357

over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);

358

}

359

360

/* load 4 pixels from a 16-byte boundary aligned address */

361

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

362

load_128_aligned (__m128i* src)

363

{

364

return _mm_load_si128 (src);

365

}

366

367

/* load 4 pixels from a unaligned address */

368

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

369

load_128_unaligned (const __m128i* src)

370

{

371

return _mm_loadu_si128 (src);

372

}

373

374

/* save 4 pixels using Write Combining memory on a 16-byte

375

* boundary aligned address

376

377

static force_inline__inline__ __attribute__ ((__always_inline__)) void

378

save_128_write_combining (__m128i* dst,

379

__m128i data)

380

{

381

_mm_stream_si128 (dst, data);

382

}

383

384

/* save 4 pixels on a 16-byte boundary aligned address */

385

static force_inline__inline__ __attribute__ ((__always_inline__)) void

386

save_128_aligned (__m128i* dst,

387

__m128i data)

388

{

389

_mm_store_si128 (dst, data);

390

}

391

392

/* save 4 pixels on a unaligned address */

393

static force_inline__inline__ __attribute__ ((__always_inline__)) void

394

save_128_unaligned (__m128i* dst,

395

__m128i data)

396

{

397

_mm_storeu_si128 (dst, data);

398

}

399

400

/* ------------------------------------------------------------------

401

* MMX inlines

402

403

404

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

405

load_32_1x64 (uint32_t data)

406

{

407

return _mm_cvtsi32_si64 (data);

408

}

409

410

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

411

unpack_32_1x64 (uint32_t data)

412

{

413

return _mm_unpacklo_pi8 (load_32_1x64 (data), _mm_setzero_si64 ());

414

}

415

416

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

417

expand_alpha_1x64 (__m64 data)

418

{

419

return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 3, 3, 3))((__m64)__builtin_ia32_pshufw(data, (((3) << 6) | ((3) <<
4) | ((3) << 2) | (3))));

420

}

421

422

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

423

expand_alpha_rev_1x64 (__m64 data)

424

{

425

return _mm_shuffle_pi16 (data, _MM_SHUFFLE (0, 0, 0, 0))((__m64)__builtin_ia32_pshufw(data, (((0) << 6) | ((0) <<
4) | ((0) << 2) | (0))));

426

}

427

428

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

429

expand_pixel_8_1x64 (uint8_t data)

430

{

431

return _mm_shuffle_pi16 (((__m64)__builtin_ia32_pshufw(unpack_32_1x64 ((uint32_t)data)
, (((0) << 6) | ((0) << 4) | ((0) << 2) | (
0))))

432

unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0))((__m64)__builtin_ia32_pshufw(unpack_32_1x64 ((uint32_t)data)
, (((0) << 6) | ((0) << 4) | ((0) << 2) | (
0))));

433

}

434

435

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

436

pix_multiply_1x64 (__m64 data,

437

__m64 alpha)

438

{

439

return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (data, alpha),

440

mask_x0080),

441

mask_x0101);

442

}

443

444

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

445

pix_add_multiply_1x64 (__m64* src,

446

__m64* alpha_dst,

447

__m64* dst,

448

__m64* alpha_src)

449

{

450

__m64 t1 = pix_multiply_1x64 (*src, *alpha_dst);

451

__m64 t2 = pix_multiply_1x64 (*dst, *alpha_src);

452

453

return _mm_adds_pu8 (t1, t2);

454

}

455

456

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

457

negate_1x64 (__m64 data)

458

{

459

return _mm_xor_si64 (data, mask_x00ff);

460

}

461

462

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

463

invert_colors_1x64 (__m64 data)

464

{

465

return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 0, 1, 2))((__m64)__builtin_ia32_pshufw(data, (((3) << 6) | ((0) <<
4) | ((1) << 2) | (2))));

466

}

467

468

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

469

over_1x64 (__m64 src, __m64 alpha, __m64 dst)

470

{

471

return _mm_adds_pu8 (src, pix_multiply_1x64 (dst, negate_1x64 (alpha)));

472

}

473

474

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

475

in_over_1x64 (__m64* src, __m64* alpha, __m64* mask, __m64* dst)

476

{

477

return over_1x64 (pix_multiply_1x64 (*src, *mask),

478

pix_multiply_1x64 (*alpha, *mask),

479

*dst);

480

}

481

482

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

483

over_rev_non_pre_1x64 (__m64 src, __m64 dst)

484

{

485

__m64 alpha = expand_alpha_1x64 (src);

486

487

return over_1x64 (pix_multiply_1x64 (invert_colors_1x64 (src),

488

_mm_or_si64 (alpha, mask_x_alpha)),

489

alpha,

490

dst);

491

}

492

493

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

494

pack_1x64_32 (__m64 data)

495

{

496

return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64 ()));

497

}

498

499

/* Expand 16 bits positioned at @pos (0-3) of a mmx register into

500

501

* 00RR00GG00BB

502

503

* --- Expanding 565 in the low word ---

504

505

* m = (m << (32 - 3)) | (m << (16 - 5)) | m;

506

* m = m & (01f0003f001f);

507

* m = m * (008404100840);

508

* m = m >> 8;

509

510

* Note the trick here - the top word is shifted by another nibble to

511

* avoid it bumping into the middle word

512

513

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

514

expand565_16_1x64 (uint16_t pixel)

515

{

516

__m64 p;

517

__m64 t1, t2;

518

519

p = _mm_cvtsi32_si64 ((uint32_t) pixel);

520

521

t1 = _mm_slli_si64 (p, 36 - 11);

522

t2 = _mm_slli_si64 (p, 16 - 5);

523

524

p = _mm_or_si64 (t1, p);

525

p = _mm_or_si64 (t2, p);

526

p = _mm_and_si64 (p, mask_x565_rgb);

527

p = _mm_mullo_pi16 (p, mask_x565_unpack);

528

529

return _mm_srli_pi16 (p, 8);

530

}

531

532

/* ----------------------------------------------------------------------------

533

* Compose Core transformations

534

535

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

536

core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)

537

{

538

uint8_t a;

539

__m64 ms;

540

541

a = src >> 24;

542

543

if (a == 0xff)

544

{

545

return src;

546

}

547

else if (src)

548

{

549

ms = unpack_32_1x64 (src);

550

return pack_1x64_32 (

551

over_1x64 (ms, expand_alpha_1x64 (ms), unpack_32_1x64 (dst)));

552

}

553

554

return dst;

555

}

556

557

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

558

combine1 (const uint32_t *ps, const uint32_t *pm)

559

{

560

uint32_t s = *ps;

561

562

if (pm)

563

{

564

__m64 ms, mm;

565

566

mm = unpack_32_1x64 (*pm);

567

mm = expand_alpha_1x64 (mm);

568

569

ms = unpack_32_1x64 (s);

570

ms = pix_multiply_1x64 (ms, mm);

571

572

s = pack_1x64_32 (ms);

573

}

574

575

return s;

576

}

577

578

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

579

combine4 (const __m128i *ps, const __m128i *pm)

580

{

581

__m128i xmm_src_lo, xmm_src_hi;

582

__m128i xmm_msk_lo, xmm_msk_hi;

583

__m128i s;

584

585

if (pm)

586

{

587

xmm_msk_lo = load_128_unaligned (pm);

588

589

if (is_transparent (xmm_msk_lo))

590

return _mm_setzero_si128 ();

591

}

592

593

s = load_128_unaligned (ps);

594

595

if (pm)

596

{

597

unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);

598

unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);

599

600

expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);

601

602

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

603

&xmm_msk_lo, &xmm_msk_hi,

604

&xmm_src_lo, &xmm_src_hi);

605

606

s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);

607

}

608

609

return s;

610

}

611

612

static force_inline__inline__ __attribute__ ((__always_inline__)) void

613

core_combine_over_u_sse2 (uint32_t* pd,

614

const uint32_t* ps,

615

const uint32_t* pm,

616

int w)

617

{

618

uint32_t s, d;

619

620

__m128i xmm_dst_lo, xmm_dst_hi;

621

__m128i xmm_src_lo, xmm_src_hi;

622

__m128i xmm_alpha_lo, xmm_alpha_hi;

623

624

/* Align dst on a 16-byte boundary */

625

while (w && ((unsigned long)pd & 15))

626

{

627

d = *pd;

628

s = combine1 (ps, pm);

629

630

*pd++ = core_combine_over_u_pixel_sse2 (s, d);

631

ps++;

632

if (pm)

633

pm++;

634

w--;

635

}

636

637

while (w >= 4)

638

{

639

/* I'm loading unaligned because I'm not sure about

640

* the address alignment.

641

642

xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);

643

644

if (is_opaque (xmm_src_hi))

645

{

646

save_128_aligned ((__m128i*)pd, xmm_src_hi);

647

}

648

else if (!is_zero (xmm_src_hi))

649

{

650

xmm_dst_hi = load_128_aligned ((__m128i*) pd);

651

652

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

653

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

654

655

expand_alpha_2x128 (

656

xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);

657

658

over_2x128 (&xmm_src_lo, &xmm_src_hi,

659

&xmm_alpha_lo, &xmm_alpha_hi,

660

&xmm_dst_lo, &xmm_dst_hi);

661

662

/* rebuid the 4 pixel data and save*/

663

save_128_aligned ((__m128i*)pd,

664

pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

665

}

666

667

w -= 4;

668

ps += 4;

669

pd += 4;

670

if (pm)

671

pm += 4;

672

}

673

674

while (w)

675

{

676

d = *pd;

677

s = combine1 (ps, pm);

678

679

*pd++ = core_combine_over_u_pixel_sse2 (s, d);

680

ps++;

681

if (pm)

682

pm++;

683

684

w--;

685

}

686

}

687

688

static force_inline__inline__ __attribute__ ((__always_inline__)) void

689

core_combine_over_reverse_u_sse2 (uint32_t* pd,

690

const uint32_t* ps,

691

const uint32_t* pm,

692

int w)

693

{

694

uint32_t s, d;

695

696

__m128i xmm_dst_lo, xmm_dst_hi;

697

__m128i xmm_src_lo, xmm_src_hi;

698

__m128i xmm_alpha_lo, xmm_alpha_hi;

699

700

/* Align dst on a 16-byte boundary */

701

while (w &&

702

((unsigned long)pd & 15))

703

{

704

d = *pd;

705

s = combine1 (ps, pm);

706

707

*pd++ = core_combine_over_u_pixel_sse2 (d, s);

708

w--;

709

ps++;

710

if (pm)

711

pm++;

712

}

713

714

while (w >= 4)

715

{

716

/* I'm loading unaligned because I'm not sure

717

* about the address alignment.

718

719

xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);

720

xmm_dst_hi = load_128_aligned ((__m128i*) pd);

721

722

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

723

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

724

725

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

726

&xmm_alpha_lo, &xmm_alpha_hi);

727

728

over_2x128 (&xmm_dst_lo, &xmm_dst_hi,

729

&xmm_alpha_lo, &xmm_alpha_hi,

730

&xmm_src_lo, &xmm_src_hi);

731

732

/* rebuid the 4 pixel data and save*/

733

save_128_aligned ((__m128i*)pd,

734

pack_2x128_128 (xmm_src_lo, xmm_src_hi));

735

736

w -= 4;

737

ps += 4;

738

pd += 4;

739

740

if (pm)

741

pm += 4;

742

}

743

744

while (w)

745

{

746

d = *pd;

747

s = combine1 (ps, pm);

748

749

*pd++ = core_combine_over_u_pixel_sse2 (d, s);

750

ps++;

751

w--;

752

if (pm)

753

pm++;

754

}

755

}

756

757

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

758

core_combine_in_u_pixelsse2 (uint32_t src, uint32_t dst)

759

{

760

uint32_t maska = src >> 24;

761

762

if (maska == 0)

763

{

764

return 0;

765

}

766

else if (maska != 0xff)

767

{

768

return pack_1x64_32 (

769

pix_multiply_1x64 (unpack_32_1x64 (dst),

770

expand_alpha_1x64 (unpack_32_1x64 (src))));

771

}

772

773

return dst;

774

}

775

776

static force_inline__inline__ __attribute__ ((__always_inline__)) void

777

core_combine_in_u_sse2 (uint32_t* pd,

778

const uint32_t* ps,

779

const uint32_t* pm,

780

int w)

781

{

782

uint32_t s, d;

783

784

__m128i xmm_src_lo, xmm_src_hi;

785

__m128i xmm_dst_lo, xmm_dst_hi;

786

787

while (w && ((unsigned long) pd & 15))

788

{

789

s = combine1 (ps, pm);

790

d = *pd;

791

792

*pd++ = core_combine_in_u_pixelsse2 (d, s);

793

w--;

794

ps++;

795

if (pm)

796

pm++;

797

}

798

799

while (w >= 4)

800

{

801

xmm_dst_hi = load_128_aligned ((__m128i*) pd);

802

xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);

803

804

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

805

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

806

807

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

808

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

809

&xmm_dst_lo, &xmm_dst_hi,

810

&xmm_dst_lo, &xmm_dst_hi);

811

812

save_128_aligned ((__m128i*)pd,

813

pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

814

815

ps += 4;

816

pd += 4;

817

w -= 4;

818

if (pm)

819

pm += 4;

820

}

821

822

while (w)

823

{

824

s = combine1 (ps, pm);

825

d = *pd;

826

827

*pd++ = core_combine_in_u_pixelsse2 (d, s);

828

w--;

829

ps++;

830

if (pm)

831

pm++;

832

}

833

}

834

835

static force_inline__inline__ __attribute__ ((__always_inline__)) void

836

core_combine_reverse_in_u_sse2 (uint32_t* pd,

837

const uint32_t* ps,

838

const uint32_t *pm,

839

int w)

840

{

841

uint32_t s, d;

842

843

__m128i xmm_src_lo, xmm_src_hi;

844

__m128i xmm_dst_lo, xmm_dst_hi;

845

846

while (w && ((unsigned long) pd & 15))

847

{

848

s = combine1 (ps, pm);

849

d = *pd;

850

851

*pd++ = core_combine_in_u_pixelsse2 (s, d);

852

ps++;

853

w--;

854

if (pm)

855

pm++;

856

}

857

858

while (w >= 4)

859

{

860

xmm_dst_hi = load_128_aligned ((__m128i*) pd);

861

xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);

862

863

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

864

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

865

866

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

867

pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,

868

&xmm_src_lo, &xmm_src_hi,

869

&xmm_dst_lo, &xmm_dst_hi);

870

871

save_128_aligned (

872

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

873

874

ps += 4;

875

pd += 4;

876

w -= 4;

877

if (pm)

878

pm += 4;

879

}

880

881

while (w)

882

{

883

s = combine1 (ps, pm);

884

d = *pd;

885

886

*pd++ = core_combine_in_u_pixelsse2 (s, d);

887

w--;

888

ps++;

889

if (pm)

890

pm++;

891

}

892

}

893

894

static force_inline__inline__ __attribute__ ((__always_inline__)) void

895

core_combine_reverse_out_u_sse2 (uint32_t* pd,

896

const uint32_t* ps,

897

const uint32_t* pm,

898

int w)

899

{

900

while (w && ((unsigned long) pd & 15))

901

{

902

uint32_t s = combine1 (ps, pm);

903

uint32_t d = *pd;

904

905

*pd++ = pack_1x64_32 (

906

pix_multiply_1x64 (

907

unpack_32_1x64 (d), negate_1x64 (

908

expand_alpha_1x64 (unpack_32_1x64 (s)))));

909

910

if (pm)

911

pm++;

912

ps++;

913

w--;

914

}

915

916

while (w >= 4)

917

{

918

__m128i xmm_src_lo, xmm_src_hi;

919

__m128i xmm_dst_lo, xmm_dst_hi;

920

921

xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);

922

xmm_dst_hi = load_128_aligned ((__m128i*) pd);

923

924

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

925

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

926

927

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

928

negate_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

929

930

pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,

931

&xmm_src_lo, &xmm_src_hi,

932

&xmm_dst_lo, &xmm_dst_hi);

933

934

save_128_aligned (

935

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

936

937

ps += 4;

938

pd += 4;

939

if (pm)

940

pm += 4;

941

942

w -= 4;

943

}

944

945

while (w)

946

{

947

uint32_t s = combine1 (ps, pm);

948

uint32_t d = *pd;

949

950

*pd++ = pack_1x64_32 (

951

pix_multiply_1x64 (

952

unpack_32_1x64 (d), negate_1x64 (

953

expand_alpha_1x64 (unpack_32_1x64 (s)))));

954

ps++;

955

if (pm)

956

pm++;

957

w--;

958

}

959

}

960

961

static force_inline__inline__ __attribute__ ((__always_inline__)) void

962

core_combine_out_u_sse2 (uint32_t* pd,

963

const uint32_t* ps,

964

const uint32_t* pm,

965

int w)

966

{

967

while (w && ((unsigned long) pd & 15))

968

{

969

uint32_t s = combine1 (ps, pm);

970

uint32_t d = *pd;

971

972

*pd++ = pack_1x64_32 (

973

pix_multiply_1x64 (

974

unpack_32_1x64 (s), negate_1x64 (

975

expand_alpha_1x64 (unpack_32_1x64 (d)))));

976

w--;

977

ps++;

978

if (pm)

979

pm++;

980

}

981

982

while (w >= 4)

983

{

984

__m128i xmm_src_lo, xmm_src_hi;

985

__m128i xmm_dst_lo, xmm_dst_hi;

986

987

xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);

988

xmm_dst_hi = load_128_aligned ((__m128i*) pd);

989

990

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

991

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

992

993

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

994

negate_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

995

996

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

997

&xmm_dst_lo, &xmm_dst_hi,

998

&xmm_dst_lo, &xmm_dst_hi);

999

1000

save_128_aligned (

1001

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1002

1003

ps += 4;

1004

pd += 4;

1005

w -= 4;

1006

if (pm)

1007

pm += 4;

1008

}

1009

1010

while (w)

1011

{

1012

uint32_t s = combine1 (ps, pm);

1013

uint32_t d = *pd;

1014

1015

*pd++ = pack_1x64_32 (

1016

pix_multiply_1x64 (

1017

unpack_32_1x64 (s), negate_1x64 (

1018

expand_alpha_1x64 (unpack_32_1x64 (d)))));

1019

w--;

1020

ps++;

1021

if (pm)

1022

pm++;

1023

}

1024

}

1025

1026

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

1027

core_combine_atop_u_pixel_sse2 (uint32_t src,

1028

uint32_t dst)

1029

{

1030

__m64 s = unpack_32_1x64 (src);

1031

__m64 d = unpack_32_1x64 (dst);

1032

1033

__m64 sa = negate_1x64 (expand_alpha_1x64 (s));

1034

__m64 da = expand_alpha_1x64 (d);

1035

1036

return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa));

1037

}

1038

1039

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1040

core_combine_atop_u_sse2 (uint32_t* pd,

1041

const uint32_t* ps,

1042

const uint32_t* pm,

1043

int w)

1044

{

1045

uint32_t s, d;

1046

1047

__m128i xmm_src_lo, xmm_src_hi;

1048

__m128i xmm_dst_lo, xmm_dst_hi;

1049

__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;

1050

__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;

1051

1052

while (w && ((unsigned long) pd & 15))

1053

{

1054

s = combine1 (ps, pm);

1055

d = *pd;

1056

1057

*pd++ = core_combine_atop_u_pixel_sse2 (s, d);

1058

w--;

1059

ps++;

1060

if (pm)

1061

pm++;

1062

}

1063

1064

while (w >= 4)

1065

{

1066

xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);

1067

xmm_dst_hi = load_128_aligned ((__m128i*) pd);

1068

1069

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1070

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

1071

1072

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

1073

&xmm_alpha_src_lo, &xmm_alpha_src_hi);

1074

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

1075

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

1076

1077

negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,

1078

&xmm_alpha_src_lo, &xmm_alpha_src_hi);

1079

1080

pix_add_multiply_2x128 (

1081

&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,

1082

&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,

1083

&xmm_dst_lo, &xmm_dst_hi);

1084

1085

save_128_aligned (

1086

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1087

1088

ps += 4;

1089

pd += 4;

1090

w -= 4;

1091

if (pm)

1092

pm += 4;

1093

}

1094

1095

while (w)

1096

{

1097

s = combine1 (ps, pm);

1098

d = *pd;

1099

1100

*pd++ = core_combine_atop_u_pixel_sse2 (s, d);

1101

w--;

1102

ps++;

1103

if (pm)

1104

pm++;

1105

}

1106

}

1107

1108

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

1109

core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,

1110

uint32_t dst)

1111

{

1112

__m64 s = unpack_32_1x64 (src);

1113

__m64 d = unpack_32_1x64 (dst);

1114

1115

__m64 sa = expand_alpha_1x64 (s);

1116

__m64 da = negate_1x64 (expand_alpha_1x64 (d));

1117

1118

return pack_1x64_32 (pix_add_multiply_1x64 (&s, &da, &d, &sa));

1119

}

1120

1121

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1122

core_combine_reverse_atop_u_sse2 (uint32_t* pd,

1123

const uint32_t* ps,

1124

const uint32_t* pm,

1125

int w)

1126

{

1127

uint32_t s, d;

1128

1129

__m128i xmm_src_lo, xmm_src_hi;

1130

__m128i xmm_dst_lo, xmm_dst_hi;

1131

__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;

1132

__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;

1133

1134

while (w && ((unsigned long) pd & 15))

1135

{

1136

s = combine1 (ps, pm);

1137

d = *pd;

1138

1139

*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);

1140

ps++;

1141

w--;

1142

if (pm)

1143

pm++;

1144

}

1145

1146

while (w >= 4)

1147

{

1148

xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);

1149

xmm_dst_hi = load_128_aligned ((__m128i*) pd);

1150

1151

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1152

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

1153

1154

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

1155

&xmm_alpha_src_lo, &xmm_alpha_src_hi);

1156

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

1157

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

1158

1159

negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,

1160

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

1161

1162

pix_add_multiply_2x128 (

1163

&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,

1164

&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,

1165

&xmm_dst_lo, &xmm_dst_hi);

1166

1167

save_128_aligned (

1168

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1169

1170

ps += 4;

1171

pd += 4;

1172

w -= 4;

1173

if (pm)

1174

pm += 4;

1175

}

1176

1177

while (w)

1178

{

1179

s = combine1 (ps, pm);

1180

d = *pd;

1181

1182

*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);

1183

ps++;

1184

w--;

1185

if (pm)

1186

pm++;

1187

}

1188

}

1189

1190

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

1191

core_combine_xor_u_pixel_sse2 (uint32_t src,

1192

uint32_t dst)

1193

{

1194

__m64 s = unpack_32_1x64 (src);

1195

__m64 d = unpack_32_1x64 (dst);

1196

1197

__m64 neg_d = negate_1x64 (expand_alpha_1x64 (d));

1198

__m64 neg_s = negate_1x64 (expand_alpha_1x64 (s));

1199

1200

return pack_1x64_32 (pix_add_multiply_1x64 (&s, &neg_d, &d, &neg_s));

1201

}

1202

1203

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1204

core_combine_xor_u_sse2 (uint32_t* dst,

1205

const uint32_t* src,

1206

const uint32_t *mask,

1207

int width)

1208

{

1209

int w = width;

1210

uint32_t s, d;

1211

uint32_t* pd = dst;

1212

const uint32_t* ps = src;

1213

const uint32_t* pm = mask;

1214

1215

__m128i xmm_src, xmm_src_lo, xmm_src_hi;

1216

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

1217

__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;

1218

__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;

1219

1220

while (w && ((unsigned long) pd & 15))

1221

{

1222

s = combine1 (ps, pm);

1223

d = *pd;

1224

1225

*pd++ = core_combine_xor_u_pixel_sse2 (s, d);

1226

w--;

1227

ps++;

1228

if (pm)

1229

pm++;

1230

}

1231

1232

while (w >= 4)

1233

{

1234

xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);

1235

xmm_dst = load_128_aligned ((__m128i*) pd);

1236

1237

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

1238

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

1239

1240

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

1241

&xmm_alpha_src_lo, &xmm_alpha_src_hi);

1242

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

1243

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

1244

1245

negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,

1246

&xmm_alpha_src_lo, &xmm_alpha_src_hi);

1247

negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,

1248

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

1249

1250

pix_add_multiply_2x128 (

1251

&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,

1252

&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,

1253

&xmm_dst_lo, &xmm_dst_hi);

1254

1255

save_128_aligned (

1256

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1257

1258

ps += 4;

1259

pd += 4;

1260

w -= 4;

1261

if (pm)

1262

pm += 4;

1263

}

1264

1265

while (w)

1266

{

1267

s = combine1 (ps, pm);

1268

d = *pd;

1269

1270

*pd++ = core_combine_xor_u_pixel_sse2 (s, d);

1271

w--;

1272

ps++;

1273

if (pm)

1274

pm++;

1275

}

1276

}

1277

1278

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1279

core_combine_add_u_sse2 (uint32_t* dst,

1280

const uint32_t* src,

1281

const uint32_t* mask,

1282

int width)

1283

{

1284

int w = width;

1285

uint32_t s, d;

1286

uint32_t* pd = dst;

1287

const uint32_t* ps = src;

1288

const uint32_t* pm = mask;

1289

1290

while (w && (unsigned long)pd & 15)

1291

{

1292

s = combine1 (ps, pm);

1293

d = *pd;

1294

1295

ps++;

1296

if (pm)

1297

pm++;

1298

*pd++ = _mm_cvtsi64_si32 (

1299

_mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));

1300

w--;

1301

}

1302

1303

while (w >= 4)

1304

{

1305

__m128i s;

1306

1307

s = combine4 ((__m128i*)ps, (__m128i*)pm);

1308

1309

save_128_aligned (

1310

(__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned ((__m128i*)pd)));

1311

1312

pd += 4;

1313

ps += 4;

1314

if (pm)

1315

pm += 4;

1316

w -= 4;

1317

}

1318

1319

while (w--)

1320

{

1321

s = combine1 (ps, pm);

1322

d = *pd;

1323

1324

ps++;

1325

*pd++ = _mm_cvtsi64_si32 (

1326

_mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));

1327

if (pm)

1328

pm++;

1329

}

1330

}

1331

1332

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

1333

core_combine_saturate_u_pixel_sse2 (uint32_t src,

1334

uint32_t dst)

1335

{

1336

__m64 ms = unpack_32_1x64 (src);

1337

__m64 md = unpack_32_1x64 (dst);

1338

uint32_t sa = src >> 24;

1339

uint32_t da = ~dst >> 24;

1340

1341

if (sa > da)

1342

{

1343

ms = pix_multiply_1x64 (

1344

ms, expand_alpha_1x64 (unpack_32_1x64 (DIV_UN8 (da, sa)(((uint16_t) (da) * 0xff) / (sa)) << 24)));

1345

}

1346

1347

return pack_1x64_32 (_mm_adds_pu16 (md, ms));

1348

}

1349

1350

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1351

core_combine_saturate_u_sse2 (uint32_t * pd,

1352

const uint32_t *ps,

1353

const uint32_t *pm,

1354

int w)

1355

{

1356

uint32_t s, d;

1357

1358

uint32_t pack_cmp;

1359

__m128i xmm_src, xmm_dst;

1360

1361

while (w && (unsigned long)pd & 15)

1362

{

1363

s = combine1 (ps, pm);

1364

d = *pd;

1365

1366

*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);

1367

w--;

1368

ps++;

1369

if (pm)

1370

pm++;

1371

}

1372

1373

while (w >= 4)

1374

{

1375

xmm_dst = load_128_aligned ((__m128i*)pd);

1376

xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);

1377

1378

pack_cmp = _mm_movemask_epi8 (

1379

_mm_cmpgt_epi32 (

1380

_mm_srli_epi32 (xmm_src, 24),

1381

_mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));

1382

1383

/* if some alpha src is grater than respective ~alpha dst */

1384

if (pack_cmp)

1385

{

1386

s = combine1 (ps++, pm);

1387

d = *pd;

1388

*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);

1389

if (pm)

1390

pm++;

1391

1392

s = combine1 (ps++, pm);

1393

d = *pd;

1394

*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);

1395

if (pm)

1396

pm++;

1397

1398

s = combine1 (ps++, pm);

1399

d = *pd;

1400

*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);

1401

if (pm)

1402

pm++;

1403

1404

s = combine1 (ps++, pm);

1405

d = *pd;

1406

*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);

1407

if (pm)

1408

pm++;

1409

}

1410

else

1411

{

1412

save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));

1413

1414

pd += 4;

1415

ps += 4;

1416

if (pm)

1417

pm += 4;

1418

}

1419

1420

w -= 4;

1421

}

1422

1423

while (w--)

1424

{

1425

s = combine1 (ps, pm);

1426

d = *pd;

1427

1428

*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);

1429

ps++;

1430

if (pm)

1431

pm++;

1432

}

1433

}

1434

1435

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1436

core_combine_src_ca_sse2 (uint32_t* pd,

1437

const uint32_t* ps,

1438

const uint32_t *pm,

1439

int w)

1440

{

1441

uint32_t s, m;

1442

1443

__m128i xmm_src_lo, xmm_src_hi;

1444

__m128i xmm_mask_lo, xmm_mask_hi;

1445

__m128i xmm_dst_lo, xmm_dst_hi;

1446

1447

while (w && (unsigned long)pd & 15)

1448

{

1449

s = *ps++;

1450

m = *pm++;

1451

*pd++ = pack_1x64_32 (

1452

pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));

1453

w--;

1454

}

1455

1456

while (w >= 4)

1457

{

1458

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

1459

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

1460

1461

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1462

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

1463

1464

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

1465

&xmm_mask_lo, &xmm_mask_hi,

1466

&xmm_dst_lo, &xmm_dst_hi);

1467

1468

save_128_aligned (

1469

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1470

1471

ps += 4;

1472

pd += 4;

1473

pm += 4;

1474

w -= 4;

1475

}

1476

1477

while (w)

1478

{

1479

s = *ps++;

1480

m = *pm++;

1481

*pd++ = pack_1x64_32 (

1482

pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));

1483

w--;

1484

}

1485

}

1486

1487

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

1488

core_combine_over_ca_pixel_sse2 (uint32_t src,

1489

uint32_t mask,

1490

uint32_t dst)

1491

{

1492

__m64 s = unpack_32_1x64 (src);

1493

__m64 expAlpha = expand_alpha_1x64 (s);

1494

__m64 unpk_mask = unpack_32_1x64 (mask);

1495

__m64 unpk_dst = unpack_32_1x64 (dst);

1496

1497

return pack_1x64_32 (in_over_1x64 (&s, &expAlpha, &unpk_mask, &unpk_dst));

1498

}

1499

1500

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1501

core_combine_over_ca_sse2 (uint32_t* pd,

1502

const uint32_t* ps,

1503

const uint32_t *pm,

1504

int w)

1505

{

1506

uint32_t s, m, d;

1507

1508

__m128i xmm_alpha_lo, xmm_alpha_hi;

1509

__m128i xmm_src_lo, xmm_src_hi;

1510

__m128i xmm_dst_lo, xmm_dst_hi;

1511

__m128i xmm_mask_lo, xmm_mask_hi;

1512

1513

while (w && (unsigned long)pd & 15)

1514

{

1515

s = *ps++;

1516

m = *pm++;

1517

d = *pd;

1518

1519

*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);

1520

w--;

1521

}

1522

1523

while (w >= 4)

1524

{

1525

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

1526

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

1527

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

1528

1529

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

1530

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1531

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

1532

1533

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

1534

&xmm_alpha_lo, &xmm_alpha_hi);

1535

1536

in_over_2x128 (&xmm_src_lo, &xmm_src_hi,

1537

&xmm_alpha_lo, &xmm_alpha_hi,

1538

&xmm_mask_lo, &xmm_mask_hi,

1539

&xmm_dst_lo, &xmm_dst_hi);

1540

1541

save_128_aligned (

1542

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1543

1544

ps += 4;

1545

pd += 4;

1546

pm += 4;

1547

w -= 4;

1548

}

1549

1550

while (w)

1551

{

1552

s = *ps++;

1553

m = *pm++;

1554

d = *pd;

1555

1556

*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);

1557

w--;

1558

}

1559

}

1560

1561

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

1562

core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,

1563

uint32_t mask,

1564

uint32_t dst)

1565

{

1566

__m64 d = unpack_32_1x64 (dst);

1567

1568

return pack_1x64_32 (

1569

over_1x64 (d, expand_alpha_1x64 (d),

1570

pix_multiply_1x64 (unpack_32_1x64 (src),

1571

unpack_32_1x64 (mask))));

1572

}

1573

1574

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1575

core_combine_over_reverse_ca_sse2 (uint32_t* pd,

1576

const uint32_t* ps,

1577

const uint32_t *pm,

1578

int w)

1579

{

1580

uint32_t s, m, d;

1581

1582

__m128i xmm_alpha_lo, xmm_alpha_hi;

1583

__m128i xmm_src_lo, xmm_src_hi;

1584

__m128i xmm_dst_lo, xmm_dst_hi;

1585

__m128i xmm_mask_lo, xmm_mask_hi;

1586

1587

while (w && (unsigned long)pd & 15)

1588

{

1589

s = *ps++;

1590

m = *pm++;

1591

d = *pd;

1592

1593

*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);

1594

w--;

1595

}

1596

1597

while (w >= 4)

1598

{

1599

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

1600

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

1601

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

1602

1603

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

1604

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1605

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

1606

1607

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

1608

&xmm_alpha_lo, &xmm_alpha_hi);

1609

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

1610

&xmm_mask_lo, &xmm_mask_hi,

1611

&xmm_mask_lo, &xmm_mask_hi);

1612

1613

over_2x128 (&xmm_dst_lo, &xmm_dst_hi,

1614

&xmm_alpha_lo, &xmm_alpha_hi,

1615

&xmm_mask_lo, &xmm_mask_hi);

1616

1617

save_128_aligned (

1618

(__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));

1619

1620

ps += 4;

1621

pd += 4;

1622

pm += 4;

1623

w -= 4;

1624

}

1625

1626

while (w)

1627

{

1628

s = *ps++;

1629

m = *pm++;

1630

d = *pd;

1631

1632

*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);

1633

w--;

1634

}

1635

}

1636

1637

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1638

core_combine_in_ca_sse2 (uint32_t * pd,

1639

const uint32_t *ps,

1640

const uint32_t *pm,

1641

int w)

1642

{

1643

uint32_t s, m, d;

1644

1645

__m128i xmm_alpha_lo, xmm_alpha_hi;

1646

__m128i xmm_src_lo, xmm_src_hi;

1647

__m128i xmm_dst_lo, xmm_dst_hi;

1648

__m128i xmm_mask_lo, xmm_mask_hi;

1649

1650

while (w && (unsigned long)pd & 15)

1651

{

1652

s = *ps++;

1653

m = *pm++;

1654

d = *pd;

1655

1656

*pd++ = pack_1x64_32 (

1657

pix_multiply_1x64 (

1658

pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)),

1659

expand_alpha_1x64 (unpack_32_1x64 (d))));

1660

1661

w--;

1662

}

1663

1664

while (w >= 4)

1665

{

1666

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

1667

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

1668

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

1669

1670

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

1671

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1672

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

1673

1674

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

1675

&xmm_alpha_lo, &xmm_alpha_hi);

1676

1677

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

1678

&xmm_mask_lo, &xmm_mask_hi,

1679

&xmm_dst_lo, &xmm_dst_hi);

1680

1681

pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,

1682

&xmm_alpha_lo, &xmm_alpha_hi,

1683

&xmm_dst_lo, &xmm_dst_hi);

1684

1685

save_128_aligned (

1686

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1687

1688

ps += 4;

1689

pd += 4;

1690

pm += 4;

1691

w -= 4;

1692

}

1693

1694

while (w)

1695

{

1696

s = *ps++;

1697

m = *pm++;

1698

d = *pd;

1699

1700

*pd++ = pack_1x64_32 (

1701

pix_multiply_1x64 (

1702

pix_multiply_1x64 (

1703

unpack_32_1x64 (s), unpack_32_1x64 (m)),

1704

expand_alpha_1x64 (unpack_32_1x64 (d))));

1705

1706

w--;

1707

}

1708

}

1709

1710

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1711

core_combine_in_reverse_ca_sse2 (uint32_t * pd,

1712

const uint32_t *ps,

1713

const uint32_t *pm,

1714

int w)

1715

{

1716

uint32_t s, m, d;

1717

1718

__m128i xmm_alpha_lo, xmm_alpha_hi;

1719

__m128i xmm_src_lo, xmm_src_hi;

1720

__m128i xmm_dst_lo, xmm_dst_hi;

1721

__m128i xmm_mask_lo, xmm_mask_hi;

1722

1723

while (w && (unsigned long)pd & 15)

1724

{

1725

s = *ps++;

1726

m = *pm++;

1727

d = *pd;

1728

1729

*pd++ = pack_1x64_32 (

1730

pix_multiply_1x64 (

1731

unpack_32_1x64 (d),

1732

pix_multiply_1x64 (unpack_32_1x64 (m),

1733

expand_alpha_1x64 (unpack_32_1x64 (s)))));

1734

w--;

1735

}

1736

1737

while (w >= 4)

1738

{

1739

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

1740

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

1741

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

1742

1743

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

1744

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1745

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

1746

1747

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

1748

&xmm_alpha_lo, &xmm_alpha_hi);

1749

pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,

1750

&xmm_alpha_lo, &xmm_alpha_hi,

1751

&xmm_alpha_lo, &xmm_alpha_hi);

1752

1753

pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,

1754

&xmm_alpha_lo, &xmm_alpha_hi,

1755

&xmm_dst_lo, &xmm_dst_hi);

1756

1757

save_128_aligned (

1758

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1759

1760

ps += 4;

1761

pd += 4;

1762

pm += 4;

1763

w -= 4;

1764

}

1765

1766

while (w)

1767

{

1768

s = *ps++;

1769

m = *pm++;

1770

d = *pd;

1771

1772

*pd++ = pack_1x64_32 (

1773

pix_multiply_1x64 (

1774

unpack_32_1x64 (d),

1775

pix_multiply_1x64 (unpack_32_1x64 (m),

1776

expand_alpha_1x64 (unpack_32_1x64 (s)))));

1777

w--;

1778

}

1779

}

1780

1781

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1782

core_combine_out_ca_sse2 (uint32_t * pd,

1783

const uint32_t *ps,

1784

const uint32_t *pm,

1785

int w)

1786

{

1787

uint32_t s, m, d;

1788

1789

__m128i xmm_alpha_lo, xmm_alpha_hi;

1790

__m128i xmm_src_lo, xmm_src_hi;

1791

__m128i xmm_dst_lo, xmm_dst_hi;

1792

__m128i xmm_mask_lo, xmm_mask_hi;

1793

1794

while (w && (unsigned long)pd & 15)

1795

{

1796

s = *ps++;

1797

m = *pm++;

1798

d = *pd;

1799

1800

*pd++ = pack_1x64_32 (

1801

pix_multiply_1x64 (

1802

pix_multiply_1x64 (

1803

unpack_32_1x64 (s), unpack_32_1x64 (m)),

1804

negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));

1805

w--;

1806

}

1807

1808

while (w >= 4)

1809

{

1810

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

1811

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

1812

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

1813

1814

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

1815

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1816

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

1817

1818

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

1819

&xmm_alpha_lo, &xmm_alpha_hi);

1820

negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,

1821

&xmm_alpha_lo, &xmm_alpha_hi);

1822

1823

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

1824

&xmm_mask_lo, &xmm_mask_hi,

1825

&xmm_dst_lo, &xmm_dst_hi);

1826

pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,

1827

&xmm_alpha_lo, &xmm_alpha_hi,

1828

&xmm_dst_lo, &xmm_dst_hi);

1829

1830

save_128_aligned (

1831

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1832

1833

ps += 4;

1834

pd += 4;

1835

pm += 4;

1836

w -= 4;

1837

}

1838

1839

while (w)

1840

{

1841

s = *ps++;

1842

m = *pm++;

1843

d = *pd;

1844

1845

*pd++ = pack_1x64_32 (

1846

pix_multiply_1x64 (

1847

pix_multiply_1x64 (

1848

unpack_32_1x64 (s), unpack_32_1x64 (m)),

1849

negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));

1850

1851

w--;

1852

}

1853

}

1854

1855

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1856

core_combine_out_reverse_ca_sse2 (uint32_t * pd,

1857

const uint32_t *ps,

1858

const uint32_t *pm,

1859

int w)

1860

{

1861

uint32_t s, m, d;

1862

1863

__m128i xmm_alpha_lo, xmm_alpha_hi;

1864

__m128i xmm_src_lo, xmm_src_hi;

1865

__m128i xmm_dst_lo, xmm_dst_hi;

1866

__m128i xmm_mask_lo, xmm_mask_hi;

1867

1868

while (w && (unsigned long)pd & 15)

1869

{

1870

s = *ps++;

1871

m = *pm++;

1872

d = *pd;

1873

1874

*pd++ = pack_1x64_32 (

1875

pix_multiply_1x64 (

1876

unpack_32_1x64 (d),

1877

negate_1x64 (pix_multiply_1x64 (

1878

unpack_32_1x64 (m),

1879

expand_alpha_1x64 (unpack_32_1x64 (s))))));

1880

w--;

1881

}

1882

1883

while (w >= 4)

1884

{

1885

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

1886

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

1887

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

1888

1889

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

1890

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1891

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

1892

1893

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

1894

&xmm_alpha_lo, &xmm_alpha_hi);

1895

1896

pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,

1897

&xmm_alpha_lo, &xmm_alpha_hi,

1898

&xmm_mask_lo, &xmm_mask_hi);

1899

1900

negate_2x128 (xmm_mask_lo, xmm_mask_hi,

1901

&xmm_mask_lo, &xmm_mask_hi);

1902

1903

pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,

1904

&xmm_mask_lo, &xmm_mask_hi,

1905

&xmm_dst_lo, &xmm_dst_hi);

1906

1907

save_128_aligned (

1908

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

1909

1910

ps += 4;

1911

pd += 4;

1912

pm += 4;

1913

w -= 4;

1914

}

1915

1916

while (w)

1917

{

1918

s = *ps++;

1919

m = *pm++;

1920

d = *pd;

1921

1922

*pd++ = pack_1x64_32 (

1923

pix_multiply_1x64 (

1924

unpack_32_1x64 (d),

1925

negate_1x64 (pix_multiply_1x64 (

1926

unpack_32_1x64 (m),

1927

expand_alpha_1x64 (unpack_32_1x64 (s))))));

1928

w--;

1929

}

1930

}

1931

1932

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

1933

core_combine_atop_ca_pixel_sse2 (uint32_t src,

1934

uint32_t mask,

1935

uint32_t dst)

1936

{

1937

__m64 m = unpack_32_1x64 (mask);

1938

__m64 s = unpack_32_1x64 (src);

1939

__m64 d = unpack_32_1x64 (dst);

1940

__m64 sa = expand_alpha_1x64 (s);

1941

__m64 da = expand_alpha_1x64 (d);

1942

1943

s = pix_multiply_1x64 (s, m);

1944

m = negate_1x64 (pix_multiply_1x64 (m, sa));

1945

1946

return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da));

1947

}

1948

1949

static force_inline__inline__ __attribute__ ((__always_inline__)) void

1950

core_combine_atop_ca_sse2 (uint32_t * pd,

1951

const uint32_t *ps,

1952

const uint32_t *pm,

1953

int w)

1954

{

1955

uint32_t s, m, d;

1956

1957

__m128i xmm_src_lo, xmm_src_hi;

1958

__m128i xmm_dst_lo, xmm_dst_hi;

1959

__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;

1960

__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;

1961

__m128i xmm_mask_lo, xmm_mask_hi;

1962

1963

while (w && (unsigned long)pd & 15)

1964

{

1965

s = *ps++;

1966

m = *pm++;

1967

d = *pd;

1968

1969

*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);

1970

w--;

1971

}

1972

1973

while (w >= 4)

1974

{

1975

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

1976

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

1977

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

1978

1979

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

1980

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

1981

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

1982

1983

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

1984

&xmm_alpha_src_lo, &xmm_alpha_src_hi);

1985

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

1986

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

1987

1988

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

1989

&xmm_mask_lo, &xmm_mask_hi,

1990

&xmm_src_lo, &xmm_src_hi);

1991

pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,

1992

&xmm_alpha_src_lo, &xmm_alpha_src_hi,

1993

&xmm_mask_lo, &xmm_mask_hi);

1994

1995

negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

1996

1997

pix_add_multiply_2x128 (

1998

&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,

1999

&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,

2000

&xmm_dst_lo, &xmm_dst_hi);

2001

2002

save_128_aligned (

2003

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

2004

2005

ps += 4;

2006

pd += 4;

2007

pm += 4;

2008

w -= 4;

2009

}

2010

2011

while (w)

2012

{

2013

s = *ps++;

2014

m = *pm++;

2015

d = *pd;

2016

2017

*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);

2018

w--;

2019

}

2020

}

2021

2022

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

2023

core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,

2024

uint32_t mask,

2025

uint32_t dst)

2026

{

2027

__m64 m = unpack_32_1x64 (mask);

2028

__m64 s = unpack_32_1x64 (src);

2029

__m64 d = unpack_32_1x64 (dst);

2030

2031

__m64 da = negate_1x64 (expand_alpha_1x64 (d));

2032

__m64 sa = expand_alpha_1x64 (s);

2033

2034

s = pix_multiply_1x64 (s, m);

2035

m = pix_multiply_1x64 (m, sa);

2036

2037

return pack_1x64_32 (pix_add_multiply_1x64 (&d, &m, &s, &da));

2038

}

2039

2040

static force_inline__inline__ __attribute__ ((__always_inline__)) void

2041

core_combine_reverse_atop_ca_sse2 (uint32_t * pd,

2042

const uint32_t *ps,

2043

const uint32_t *pm,

2044

int w)

2045

{

2046

uint32_t s, m, d;

2047

2048

__m128i xmm_src_lo, xmm_src_hi;

2049

__m128i xmm_dst_lo, xmm_dst_hi;

2050

__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;

2051

__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;

2052

__m128i xmm_mask_lo, xmm_mask_hi;

2053

2054

while (w && (unsigned long)pd & 15)

2055

{

2056

s = *ps++;

2057

m = *pm++;

2058

d = *pd;

2059

2060

*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);

2061

w--;

2062

}

2063

2064

while (w >= 4)

2065

{

2066

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

2067

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

2068

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

2069

2070

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

2071

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

2072

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

2073

2074

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

2075

&xmm_alpha_src_lo, &xmm_alpha_src_hi);

2076

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

2077

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

2078

2079

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

2080

&xmm_mask_lo, &xmm_mask_hi,

2081

&xmm_src_lo, &xmm_src_hi);

2082

pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,

2083

&xmm_alpha_src_lo, &xmm_alpha_src_hi,

2084

&xmm_mask_lo, &xmm_mask_hi);

2085

2086

negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,

2087

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

2088

2089

pix_add_multiply_2x128 (

2090

&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,

2091

&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,

2092

&xmm_dst_lo, &xmm_dst_hi);

2093

2094

save_128_aligned (

2095

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

2096

2097

ps += 4;

2098

pd += 4;

2099

pm += 4;

2100

w -= 4;

2101

}

2102

2103

while (w)

2104

{

2105

s = *ps++;

2106

m = *pm++;

2107

d = *pd;

2108

2109

*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);

2110

w--;

2111

}

2112

}

2113

2114

static force_inline__inline__ __attribute__ ((__always_inline__)) uint32_t

2115

core_combine_xor_ca_pixel_sse2 (uint32_t src,

2116

uint32_t mask,

2117

uint32_t dst)

2118

{

2119

__m64 a = unpack_32_1x64 (mask);

2120

__m64 s = unpack_32_1x64 (src);

2121

__m64 d = unpack_32_1x64 (dst);

2122

2123

__m64 alpha_dst = negate_1x64 (pix_multiply_1x64 (

2124

a, expand_alpha_1x64 (s)));

2125

__m64 dest = pix_multiply_1x64 (s, a);

2126

__m64 alpha_src = negate_1x64 (expand_alpha_1x64 (d));

2127

2128

return pack_1x64_32 (pix_add_multiply_1x64 (&d,

2129

&alpha_dst,

2130

&dest,

2131

&alpha_src));

2132

}

2133

2134

static force_inline__inline__ __attribute__ ((__always_inline__)) void

2135

core_combine_xor_ca_sse2 (uint32_t * pd,

2136

const uint32_t *ps,

2137

const uint32_t *pm,

2138

int w)

2139

{

2140

uint32_t s, m, d;

2141

2142

__m128i xmm_src_lo, xmm_src_hi;

2143

__m128i xmm_dst_lo, xmm_dst_hi;

2144

__m128i xmm_alpha_src_lo, xmm_alpha_src_hi;

2145

__m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;

2146

__m128i xmm_mask_lo, xmm_mask_hi;

2147

2148

while (w && (unsigned long)pd & 15)

2149

{

2150

s = *ps++;

2151

m = *pm++;

2152

d = *pd;

2153

2154

*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);

2155

w--;

2156

}

2157

2158

while (w >= 4)

2159

{

2160

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

2161

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

2162

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

2163

2164

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

2165

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

2166

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

2167

2168

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

2169

&xmm_alpha_src_lo, &xmm_alpha_src_hi);

2170

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,

2171

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

2172

2173

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

2174

&xmm_mask_lo, &xmm_mask_hi,

2175

&xmm_src_lo, &xmm_src_hi);

2176

pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,

2177

&xmm_alpha_src_lo, &xmm_alpha_src_hi,

2178

&xmm_mask_lo, &xmm_mask_hi);

2179

2180

negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,

2181

&xmm_alpha_dst_lo, &xmm_alpha_dst_hi);

2182

negate_2x128 (xmm_mask_lo, xmm_mask_hi,

2183

&xmm_mask_lo, &xmm_mask_hi);

2184

2185

pix_add_multiply_2x128 (

2186

&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,

2187

&xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,

2188

&xmm_dst_lo, &xmm_dst_hi);

2189

2190

save_128_aligned (

2191

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

2192

2193

ps += 4;

2194

pd += 4;

2195

pm += 4;

2196

w -= 4;

2197

}

2198

2199

while (w)

2200

{

2201

s = *ps++;

2202

m = *pm++;

2203

d = *pd;

2204

2205

*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);

2206

w--;

2207

}

2208

}

2209

2210

static force_inline__inline__ __attribute__ ((__always_inline__)) void

2211

core_combine_add_ca_sse2 (uint32_t * pd,

2212

const uint32_t *ps,

2213

const uint32_t *pm,

2214

int w)

2215

{

2216

uint32_t s, m, d;

2217

2218

__m128i xmm_src_lo, xmm_src_hi;

2219

__m128i xmm_dst_lo, xmm_dst_hi;

2220

__m128i xmm_mask_lo, xmm_mask_hi;

2221

2222

while (w && (unsigned long)pd & 15)

2223

{

2224

s = *ps++;

2225

m = *pm++;

2226

d = *pd;

2227

2228

*pd++ = pack_1x64_32 (

2229

_mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),

2230

unpack_32_1x64 (m)),

2231

unpack_32_1x64 (d)));

2232

w--;

2233

}

2234

2235

while (w >= 4)

2236

{

2237

xmm_src_hi = load_128_unaligned ((__m128i*)ps);

2238

xmm_mask_hi = load_128_unaligned ((__m128i*)pm);

2239

xmm_dst_hi = load_128_aligned ((__m128i*)pd);

2240

2241

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

2242

unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

2243

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

2244

2245

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

2246

&xmm_mask_lo, &xmm_mask_hi,

2247

&xmm_src_lo, &xmm_src_hi);

2248

2249

save_128_aligned (

2250

(__m128i*)pd, pack_2x128_128 (

2251

_mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),

2252

_mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));

2253

2254

ps += 4;

2255

pd += 4;

2256

pm += 4;

2257

w -= 4;

2258

}

2259

2260

while (w)

2261

{

2262

s = *ps++;

2263

m = *pm++;

2264

d = *pd;

2265

2266

*pd++ = pack_1x64_32 (

2267

_mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),

2268

unpack_32_1x64 (m)),

2269

unpack_32_1x64 (d)));

2270

w--;

2271

}

2272

}

2273

2274

/* ---------------------------------------------------

2275

* fb_compose_setup_sSE2

2276

2277

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

2278

create_mask_16_64 (uint16_t mask)

2279

{

2280

return _mm_set1_pi16 (mask);

2281

}

2282

2283

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

2284

create_mask_16_128 (uint16_t mask)

2285

{

2286

return _mm_set1_epi16 (mask);

2287

}

2288

2289

static force_inline__inline__ __attribute__ ((__always_inline__)) __m64

2290

create_mask_2x32_64 (uint32_t mask0,

2291

uint32_t mask1)

2292

{

2293

return _mm_set_pi32 (mask0, mask1);

2294

}

2295

2296

/* Work around a code generation bug in Sun Studio 12. */

2297

#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)

2298

# define create_mask_2x32_128(mask0, mask1) \

2299

(_mm_set_epi32 ((mask0), (mask1), (mask0), (mask1)))

2300

#else

2301

static force_inline__inline__ __attribute__ ((__always_inline__)) __m128i

2302

create_mask_2x32_128 (uint32_t mask0,

2303

uint32_t mask1)

2304

{

2305

return _mm_set_epi32 (mask0, mask1, mask0, mask1);

2306

}

2307

#endif

2308

2309

/* SSE2 code patch for fbcompose.c */

2310

2311

static void

2312

sse2_combine_over_u (pixman_implementation_t *imp,

2313

pixman_op_t op,

2314

uint32_t * dst,

2315

const uint32_t * src,

2316

const uint32_t * mask,

2317

int width)

2318

{

2319

core_combine_over_u_sse2 (dst, src, mask, width);

2320

_mm_empty ();

2321

}

2322

2323

static void

2324

sse2_combine_over_reverse_u (pixman_implementation_t *imp,

2325

pixman_op_t op,

2326

uint32_t * dst,

2327

const uint32_t * src,

2328

const uint32_t * mask,

2329

int width)

2330

{

2331

core_combine_over_reverse_u_sse2 (dst, src, mask, width);

2332

_mm_empty ();

2333

}

2334

2335

static void

2336

sse2_combine_in_u (pixman_implementation_t *imp,

2337

pixman_op_t op,

2338

uint32_t * dst,

2339

const uint32_t * src,

2340

const uint32_t * mask,

2341

int width)

2342

{

2343

core_combine_in_u_sse2 (dst, src, mask, width);

2344

_mm_empty ();

2345

}

2346

2347

static void

2348

sse2_combine_in_reverse_u (pixman_implementation_t *imp,

2349

pixman_op_t op,

2350

uint32_t * dst,

2351

const uint32_t * src,

2352

const uint32_t * mask,

2353

int width)

2354

{

2355

core_combine_reverse_in_u_sse2 (dst, src, mask, width);

2356

_mm_empty ();

2357

}

2358

2359

static void

2360

sse2_combine_out_u (pixman_implementation_t *imp,

2361

pixman_op_t op,

2362

uint32_t * dst,

2363

const uint32_t * src,

2364

const uint32_t * mask,

2365

int width)

2366

{

2367

core_combine_out_u_sse2 (dst, src, mask, width);

2368

_mm_empty ();

2369

}

2370

2371

static void

2372

sse2_combine_out_reverse_u (pixman_implementation_t *imp,

2373

pixman_op_t op,

2374

uint32_t * dst,

2375

const uint32_t * src,

2376

const uint32_t * mask,

2377

int width)

2378

{

2379

core_combine_reverse_out_u_sse2 (dst, src, mask, width);

2380

_mm_empty ();

2381

}

2382

2383

static void

2384

sse2_combine_atop_u (pixman_implementation_t *imp,

2385

pixman_op_t op,

2386

uint32_t * dst,

2387

const uint32_t * src,

2388

const uint32_t * mask,

2389

int width)

2390

{

2391

core_combine_atop_u_sse2 (dst, src, mask, width);

2392

_mm_empty ();

2393

}

2394

2395

static void

2396

sse2_combine_atop_reverse_u (pixman_implementation_t *imp,

2397

pixman_op_t op,

2398

uint32_t * dst,

2399

const uint32_t * src,

2400

const uint32_t * mask,

2401

int width)

2402

{

2403

core_combine_reverse_atop_u_sse2 (dst, src, mask, width);

2404

_mm_empty ();

2405

}

2406

2407

static void

2408

sse2_combine_xor_u (pixman_implementation_t *imp,

2409

pixman_op_t op,

2410

uint32_t * dst,

2411

const uint32_t * src,

2412

const uint32_t * mask,

2413

int width)

2414

{

2415

core_combine_xor_u_sse2 (dst, src, mask, width);

2416

_mm_empty ();

2417

}

2418

2419

static void

2420

sse2_combine_add_u (pixman_implementation_t *imp,

2421

pixman_op_t op,

2422

uint32_t * dst,

2423

const uint32_t * src,

2424

const uint32_t * mask,

2425

int width)

2426

{

2427

core_combine_add_u_sse2 (dst, src, mask, width);

2428

_mm_empty ();

2429

}

2430

2431

static void

2432

sse2_combine_saturate_u (pixman_implementation_t *imp,

2433

pixman_op_t op,

2434

uint32_t * dst,

2435

const uint32_t * src,

2436

const uint32_t * mask,

2437

int width)

2438

{

2439

core_combine_saturate_u_sse2 (dst, src, mask, width);

2440

_mm_empty ();

2441

}

2442

2443

static void

2444

sse2_combine_src_ca (pixman_implementation_t *imp,

2445

pixman_op_t op,

2446

uint32_t * dst,

2447

const uint32_t * src,

2448

const uint32_t * mask,

2449

int width)

2450

{

2451

core_combine_src_ca_sse2 (dst, src, mask, width);

2452

_mm_empty ();

2453

}

2454

2455

static void

2456

sse2_combine_over_ca (pixman_implementation_t *imp,

2457

pixman_op_t op,

2458

uint32_t * dst,

2459

const uint32_t * src,

2460

const uint32_t * mask,

2461

int width)

2462

{

2463

core_combine_over_ca_sse2 (dst, src, mask, width);

2464

_mm_empty ();

2465

}

2466

2467

static void

2468

sse2_combine_over_reverse_ca (pixman_implementation_t *imp,

2469

pixman_op_t op,

2470

uint32_t * dst,

2471

const uint32_t * src,

2472

const uint32_t * mask,

2473

int width)

2474

{

2475

core_combine_over_reverse_ca_sse2 (dst, src, mask, width);

2476

_mm_empty ();

2477

}

2478

2479

static void

2480

sse2_combine_in_ca (pixman_implementation_t *imp,

2481

pixman_op_t op,

2482

uint32_t * dst,

2483

const uint32_t * src,

2484

const uint32_t * mask,

2485

int width)

2486

{

2487

core_combine_in_ca_sse2 (dst, src, mask, width);

2488

_mm_empty ();

2489

}

2490

2491

static void

2492

sse2_combine_in_reverse_ca (pixman_implementation_t *imp,

2493

pixman_op_t op,

2494

uint32_t * dst,

2495

const uint32_t * src,

2496

const uint32_t * mask,

2497

int width)

2498

{

2499

core_combine_in_reverse_ca_sse2 (dst, src, mask, width);

2500

_mm_empty ();

2501

}

2502

2503

static void

2504

sse2_combine_out_ca (pixman_implementation_t *imp,

2505

pixman_op_t op,

2506

uint32_t * dst,

2507

const uint32_t * src,

2508

const uint32_t * mask,

2509

int width)

2510

{

2511

core_combine_out_ca_sse2 (dst, src, mask, width);

2512

_mm_empty ();

2513

}

2514

2515

static void

2516

sse2_combine_out_reverse_ca (pixman_implementation_t *imp,

2517

pixman_op_t op,

2518

uint32_t * dst,

2519

const uint32_t * src,

2520

const uint32_t * mask,

2521

int width)

2522

{

2523

core_combine_out_reverse_ca_sse2 (dst, src, mask, width);

2524

_mm_empty ();

2525

}

2526

2527

static void

2528

sse2_combine_atop_ca (pixman_implementation_t *imp,

2529

pixman_op_t op,

2530

uint32_t * dst,

2531

const uint32_t * src,

2532

const uint32_t * mask,

2533

int width)

2534

{

2535

core_combine_atop_ca_sse2 (dst, src, mask, width);

2536

_mm_empty ();

2537

}

2538

2539

static void

2540

sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,

2541

pixman_op_t op,

2542

uint32_t * dst,

2543

const uint32_t * src,

2544

const uint32_t * mask,

2545

int width)

2546

{

2547

core_combine_reverse_atop_ca_sse2 (dst, src, mask, width);

2548

_mm_empty ();

2549

}

2550

2551

static void

2552

sse2_combine_xor_ca (pixman_implementation_t *imp,

2553

pixman_op_t op,

2554

uint32_t * dst,

2555

const uint32_t * src,

2556

const uint32_t * mask,

2557

int width)

2558

{

2559

core_combine_xor_ca_sse2 (dst, src, mask, width);

2560

_mm_empty ();

2561

}

2562

2563

static void

2564

sse2_combine_add_ca (pixman_implementation_t *imp,

2565

pixman_op_t op,

2566

uint32_t * dst,

2567

const uint32_t * src,

2568

const uint32_t * mask,

2569

int width)

2570

{

2571

core_combine_add_ca_sse2 (dst, src, mask, width);

2572

_mm_empty ();

2573

}

2574

2575

/* -------------------------------------------------------------------

2576

* composite_over_n_8888

2577

2578

2579

static void

2580

sse2_composite_over_n_8888 (pixman_implementation_t *imp,

2581

pixman_op_t op,

2582

pixman_image_t * src_image,

2583

pixman_image_t * mask_image,

2584

pixman_image_t * dst_image,

2585

int32_t src_x,

2586

int32_t src_y,

2587

int32_t mask_x,

2588

int32_t mask_y,

2589

int32_t dest_x,

2590

int32_t dest_y,

2591

int32_t width,

2592

int32_t height)

2593

{

2594

uint32_t src;

2595

uint32_t *dst_line, *dst, d;

2596

int32_t w;

2597

int dst_stride;

2598

__m128i xmm_src, xmm_alpha;

2599

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

2600

2601

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

2602

2603

if (src == 0)

2604

return;

2605

2606

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

2607

dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0);

2608

2609

xmm_src = expand_pixel_32_1x128 (src);

2610

xmm_alpha = expand_alpha_1x128 (xmm_src);

2611

2612

while (height--)

2613

{

2614

dst = dst_line;

2615

2616

dst_line += dst_stride;

2617

w = width;

2618

2619

while (w && (unsigned long)dst & 15)

2620

{

2621

d = *dst;

2622

*dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),

2623

_mm_movepi64_pi64 (xmm_alpha),

2624

unpack_32_1x64 (d)));

2625

w--;

2626

}

2627

2628

while (w >= 4)

2629

{

2630

xmm_dst = load_128_aligned ((__m128i*)dst);

2631

2632

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

2633

2634

over_2x128 (&xmm_src, &xmm_src,

2635

&xmm_alpha, &xmm_alpha,

2636

&xmm_dst_lo, &xmm_dst_hi);

2637

2638

/* rebuid the 4 pixel data and save*/

2639

save_128_aligned (

2640

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

2641

2642

w -= 4;

2643

dst += 4;

2644

}

2645

2646

while (w)

2647

{

2648

d = *dst;

2649

*dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),

2650

_mm_movepi64_pi64 (xmm_alpha),

2651

unpack_32_1x64 (d)));

2652

w--;

2653

}

2654

2655

}

2656

_mm_empty ();

2657

}

2658

2659

/* ---------------------------------------------------------------------

2660

* composite_over_n_0565

2661

2662

static void

2663

sse2_composite_over_n_0565 (pixman_implementation_t *imp,

2664

pixman_op_t op,

2665

pixman_image_t * src_image,

2666

pixman_image_t * mask_image,

2667

pixman_image_t * dst_image,

2668

int32_t src_x,

2669

int32_t src_y,

2670

int32_t mask_x,

2671

int32_t mask_y,

2672

int32_t dest_x,

2673

int32_t dest_y,

2674

int32_t width,

2675

int32_t height)

2676

{

2677

uint32_t src;

2678

uint16_t *dst_line, *dst, d;

2679

int32_t w;

2680

int dst_stride;

2681

__m128i xmm_src, xmm_alpha;

2682

__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;

2683

2684

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

2685

2686

if (src == 0)

2687

return;

2688

2689

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

2690

dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0);

2691

2692

xmm_src = expand_pixel_32_1x128 (src);

2693

xmm_alpha = expand_alpha_1x128 (xmm_src);

2694

2695

while (height--)

2696

{

2697

dst = dst_line;

2698

2699

dst_line += dst_stride;

2700

w = width;

2701

2702

while (w && (unsigned long)dst & 15)

2703

{

2704

d = *dst;

2705

2706

*dst++ = pack_565_32_16 (

2707

pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),

2708

_mm_movepi64_pi64 (xmm_alpha),

2709

expand565_16_1x64 (d))));

2710

w--;

2711

}

2712

2713

while (w >= 8)

2714

{

2715

xmm_dst = load_128_aligned ((__m128i*)dst);

2716

2717

unpack_565_128_4x128 (xmm_dst,

2718

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);

2719

2720

over_2x128 (&xmm_src, &xmm_src,

2721

&xmm_alpha, &xmm_alpha,

2722

&xmm_dst0, &xmm_dst1);

2723

over_2x128 (&xmm_src, &xmm_src,

2724

&xmm_alpha, &xmm_alpha,

2725

&xmm_dst2, &xmm_dst3);

2726

2727

xmm_dst = pack_565_4x128_128 (

2728

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);

2729

2730

save_128_aligned ((__m128i*)dst, xmm_dst);

2731

2732

dst += 8;

2733

w -= 8;

2734

}

2735

2736

while (w--)

2737

{

2738

d = *dst;

2739

*dst++ = pack_565_32_16 (

2740

pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),

2741

_mm_movepi64_pi64 (xmm_alpha),

2742

expand565_16_1x64 (d))));

2743

}

2744

}

2745

2746

_mm_empty ();

2747

}

2748

2749

/* ------------------------------

2750

* composite_add_n_8888_8888_ca

2751

2752

static void

2753

sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,

2754

pixman_op_t op,

2755

pixman_image_t * src_image,

2756

pixman_image_t * mask_image,

2757

pixman_image_t * dst_image,

2758

int32_t src_x,

2759

int32_t src_y,

2760

int32_t mask_x,

2761

int32_t mask_y,

2762

int32_t dest_x,

2763

int32_t dest_y,

2764

int32_t width,

2765

int32_t height)

2766

{

2767

uint32_t src, srca;

2768

uint32_t *dst_line, d;

2769

uint32_t *mask_line, m;

2770

uint32_t pack_cmp;

2771

int dst_stride, mask_stride;

2772

2773

__m128i xmm_src, xmm_alpha;

2774

__m128i xmm_dst;

2775

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

2776

2777

__m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;

2778

2779

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

2780

srca = src >> 24;

2781

2782

if (src == 0)

2783

return;

2784

2785

2786

2787

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)

2788

mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint32_t); (mask_line) = ((uint32_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);

2789

2790

xmm_src = _mm_unpacklo_epi8 (

2791

create_mask_2x32_128 (src, src), _mm_setzero_si128 ());

2792

xmm_alpha = expand_alpha_1x128 (xmm_src);

2793

mmx_src = _mm_movepi64_pi64 (xmm_src);

2794

mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);

2795

2796

while (height--)

2797

{

2798

int w = width;

2799

const uint32_t *pm = (uint32_t *)mask_line;

2800

uint32_t *pd = (uint32_t *)dst_line;

2801

2802

dst_line += dst_stride;

2803

mask_line += mask_stride;

2804

2805

while (w && (unsigned long)pd & 15)

2806

{

2807

m = *pm++;

2808

2809

if (m)

2810

{

2811

d = *pd;

2812

2813

mmx_mask = unpack_32_1x64 (m);

2814

mmx_dest = unpack_32_1x64 (d);

2815

2816

*pd = pack_1x64_32 (

2817

_mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest));

2818

}

2819

2820

pd++;

2821

w--;

2822

}

2823

2824

while (w >= 4)

2825

{

2826

xmm_mask = load_128_unaligned ((__m128i*)pm);

2827

2828

pack_cmp =

2829

_mm_movemask_epi8 (

2830

_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));

2831

2832

/* if all bits in mask are zero, pack_cmp are equal to 0xffff */

2833

if (pack_cmp != 0xffff)

2834

{

2835

xmm_dst = load_128_aligned ((__m128i*)pd);

2836

2837

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

2838

2839

pix_multiply_2x128 (&xmm_src, &xmm_src,

2840

&xmm_mask_lo, &xmm_mask_hi,

2841

&xmm_mask_lo, &xmm_mask_hi);

2842

xmm_mask_hi = pack_2x128_128 (xmm_mask_lo, xmm_mask_hi);

2843

2844

save_128_aligned (

2845

(__m128i*)pd, _mm_adds_epu8 (xmm_mask_hi, xmm_dst));

2846

}

2847

2848

pd += 4;

2849

pm += 4;

2850

w -= 4;

2851

}

2852

2853

while (w)

2854

{

2855

m = *pm++;

2856

2857

if (m)

2858

{

2859

d = *pd;

2860

2861

mmx_mask = unpack_32_1x64 (m);

2862

mmx_dest = unpack_32_1x64 (d);

2863

2864

*pd = pack_1x64_32 (

2865

_mm_adds_pu8 (pix_multiply_1x64 (mmx_mask, mmx_src), mmx_dest));

2866

}

2867

2868

pd++;

2869

w--;

2870

}

2871

}

2872

2873

_mm_empty ();

2874

}

2875

2876

/* ---------------------------------------------------------------------------

2877

* composite_over_n_8888_8888_ca

2878

2879

2880

static void

2881

sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,

2882

pixman_op_t op,

2883

pixman_image_t * src_image,

2884

pixman_image_t * mask_image,

2885

pixman_image_t * dst_image,

2886

int32_t src_x,

2887

int32_t src_y,

2888

int32_t mask_x,

2889

int32_t mask_y,

2890

int32_t dest_x,

2891

int32_t dest_y,

2892

int32_t width,

2893

int32_t height)

2894

{

2895

uint32_t src;

2896

uint32_t *dst_line, d;

2897

uint32_t *mask_line, m;

2898

uint32_t pack_cmp;

2899

int dst_stride, mask_stride;

2900

2901

__m128i xmm_src, xmm_alpha;

2902

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

2903

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

2904

2905

__m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;

2906

2907

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

2908

2909

if (src == 0)

2910

return;

2911

2912

2913

2914

2915

2916

2917

xmm_src = _mm_unpacklo_epi8 (

2918

create_mask_2x32_128 (src, src), _mm_setzero_si128 ());

2919

xmm_alpha = expand_alpha_1x128 (xmm_src);

2920

mmx_src = _mm_movepi64_pi64 (xmm_src);

2921

mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);

2922

2923

while (height--)

2924

{

2925

int w = width;

2926

const uint32_t *pm = (uint32_t *)mask_line;

2927

uint32_t *pd = (uint32_t *)dst_line;

2928

2929

dst_line += dst_stride;

2930

mask_line += mask_stride;

2931

2932

while (w && (unsigned long)pd & 15)

2933

{

2934

m = *pm++;

2935

2936

if (m)

2937

{

2938

d = *pd;

2939

mmx_mask = unpack_32_1x64 (m);

2940

mmx_dest = unpack_32_1x64 (d);

2941

2942

*pd = pack_1x64_32 (in_over_1x64 (&mmx_src,

2943

&mmx_alpha,

2944

&mmx_mask,

2945

&mmx_dest));

2946

}

2947

2948

pd++;

2949

w--;

2950

}

2951

2952

while (w >= 4)

2953

{

2954

xmm_mask = load_128_unaligned ((__m128i*)pm);

2955

2956

pack_cmp =

2957

_mm_movemask_epi8 (

2958

_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));

2959

2960

/* if all bits in mask are zero, pack_cmp are equal to 0xffff */

2961

if (pack_cmp != 0xffff)

2962

{

2963

xmm_dst = load_128_aligned ((__m128i*)pd);

2964

2965

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

2966

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

2967

2968

in_over_2x128 (&xmm_src, &xmm_src,

2969

&xmm_alpha, &xmm_alpha,

2970

&xmm_mask_lo, &xmm_mask_hi,

2971

&xmm_dst_lo, &xmm_dst_hi);

2972

2973

save_128_aligned (

2974

(__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

2975

}

2976

2977

pd += 4;

2978

pm += 4;

2979

w -= 4;

2980

}

2981

2982

while (w)

2983

{

2984

m = *pm++;

2985

2986

if (m)

2987

{

2988

d = *pd;

2989

mmx_mask = unpack_32_1x64 (m);

2990

mmx_dest = unpack_32_1x64 (d);

2991

2992

*pd = pack_1x64_32 (

2993

in_over_1x64 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));

2994

}

2995

2996

pd++;

2997

w--;

2998

}

2999

}

3000

3001

_mm_empty ();

3002

}

3003

3004

/*---------------------------------------------------------------------

3005

* composite_over_8888_n_8888

3006

3007

3008

static void

3009

sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,

3010

pixman_op_t op,

3011

pixman_image_t * src_image,

3012

pixman_image_t * mask_image,

3013

pixman_image_t * dst_image,

3014

int32_t src_x,

3015

int32_t src_y,

3016

int32_t mask_x,

3017

int32_t mask_y,

3018

int32_t dest_x,

3019

int32_t dest_y,

3020

int32_t width,

3021

int32_t height)

3022

{

3023

uint32_t *dst_line, *dst;

3024

uint32_t *src_line, *src;

3025

uint32_t mask;

3026

int32_t w;

3027

int dst_stride, src_stride;

3028

3029

__m128i xmm_mask;

3030

__m128i xmm_src, xmm_src_lo, xmm_src_hi;

3031

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

3032

__m128i xmm_alpha_lo, xmm_alpha_hi;

3033

3034

3035

3036

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)

3037

src_image, src_x, src_y, uint32_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (src_line) = ((uint32_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);

3038

3039

mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);

3040

3041

xmm_mask = create_mask_16_128 (mask >> 24);

3042

3043

while (height--)

3044

{

3045

dst = dst_line;

3046

dst_line += dst_stride;

3047

src = src_line;

3048

src_line += src_stride;

3049

w = width;

3050

3051

while (w && (unsigned long)dst & 15)

3052

{

3053

uint32_t s = *src++;

3054

3055

if (s)

3056

{

3057

uint32_t d = *dst;

3058

3059

__m64 ms = unpack_32_1x64 (s);

3060

__m64 alpha = expand_alpha_1x64 (ms);

3061

__m64 dest = _mm_movepi64_pi64 (xmm_mask);

3062

__m64 alpha_dst = unpack_32_1x64 (d);

3063

3064

*dst = pack_1x64_32 (

3065

in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));

3066

}

3067

dst++;

3068

w--;

3069

}

3070

3071

while (w >= 4)

3072

{

3073

xmm_src = load_128_unaligned ((__m128i*)src);

3074

3075

if (!is_zero (xmm_src))

3076

{

3077

xmm_dst = load_128_aligned ((__m128i*)dst);

3078

3079

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

3080

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

3081

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

3082

&xmm_alpha_lo, &xmm_alpha_hi);

3083

3084

in_over_2x128 (&xmm_src_lo, &xmm_src_hi,

3085

&xmm_alpha_lo, &xmm_alpha_hi,

3086

&xmm_mask, &xmm_mask,

3087

&xmm_dst_lo, &xmm_dst_hi);

3088

3089

save_128_aligned (

3090

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

3091

}

3092

3093

dst += 4;

3094

src += 4;

3095

w -= 4;

3096

}

3097

3098

while (w)

3099

{

3100

uint32_t s = *src++;

3101

3102

if (s)

3103

{

3104

uint32_t d = *dst;

3105

3106

__m64 ms = unpack_32_1x64 (s);

3107

__m64 alpha = expand_alpha_1x64 (ms);

3108

__m64 mask = _mm_movepi64_pi64 (xmm_mask);

3109

__m64 dest = unpack_32_1x64 (d);

3110

3111

*dst = pack_1x64_32 (

3112

in_over_1x64 (&ms, &alpha, &mask, &dest));

3113

}

3114

3115

dst++;

3116

w--;

3117

}

3118

}

3119

3120

_mm_empty ();

3121

}

3122

3123

/*---------------------------------------------------------------------

3124

* composite_over_8888_n_8888

3125

3126

3127

static void

3128

sse2_composite_src_x888_8888 (pixman_implementation_t *imp,

3129

pixman_op_t op,

3130

pixman_image_t * src_image,

3131

pixman_image_t * mask_image,

3132

pixman_image_t * dst_image,

3133

int32_t src_x,

3134

int32_t src_y,

3135

int32_t mask_x,

3136

int32_t mask_y,

3137

int32_t dest_x,

3138

int32_t dest_y,

3139

int32_t width,

3140

int32_t height)

3141

{

3142

uint32_t *dst_line, *dst;

3143

uint32_t *src_line, *src;

3144

int32_t w;

3145

int dst_stride, src_stride;

3146

3147

3148

3149

3150

3151

3152

3153

while (height--)

3154

{

3155

dst = dst_line;

3156

dst_line += dst_stride;

3157

src = src_line;

3158

src_line += src_stride;

3159

w = width;

3160

3161

while (w && (unsigned long)dst & 15)

3162

{

3163

*dst++ = *src++ | 0xff000000;

3164

w--;

3165

}

3166

3167

while (w >= 16)

3168

{

3169

__m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;

3170

3171

xmm_src1 = load_128_unaligned ((__m128i*)src + 0);

3172

xmm_src2 = load_128_unaligned ((__m128i*)src + 1);

3173

xmm_src3 = load_128_unaligned ((__m128i*)src + 2);

3174

xmm_src4 = load_128_unaligned ((__m128i*)src + 3);

3175

3176

save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));

3177

save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));

3178

save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));

3179

save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));

3180

3181

dst += 16;

3182

src += 16;

3183

w -= 16;

3184

}

3185

3186

while (w)

3187

{

3188

*dst++ = *src++ | 0xff000000;

3189

w--;

3190

}

3191

}

3192

3193

_mm_empty ();

3194

}

3195

3196

/* ---------------------------------------------------------------------

3197

* composite_over_x888_n_8888

3198

3199

static void

3200

sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,

3201

pixman_op_t op,

3202

pixman_image_t * src_image,

3203

pixman_image_t * mask_image,

3204

pixman_image_t * dst_image,

3205

int32_t src_x,

3206

int32_t src_y,

3207

int32_t mask_x,

3208

int32_t mask_y,

3209

int32_t dest_x,

3210

int32_t dest_y,

3211

int32_t width,

3212

int32_t height)

3213

{

3214

uint32_t *dst_line, *dst;

3215

uint32_t *src_line, *src;

3216

uint32_t mask;

3217

int dst_stride, src_stride;

3218

int32_t w;

3219

3220

__m128i xmm_mask, xmm_alpha;

3221

__m128i xmm_src, xmm_src_lo, xmm_src_hi;

3222

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

3223

3224

3225

3226

3227

3228

3229

mask = _pixman_image_get_solid (imp, mask_image, PIXMAN_a8r8g8b8);

3230

3231

xmm_mask = create_mask_16_128 (mask >> 24);

3232

xmm_alpha = mask_00ff;

3233

3234

while (height--)

3235

{

3236

dst = dst_line;

3237

dst_line += dst_stride;

3238

src = src_line;

3239

src_line += src_stride;

3240

w = width;

3241

3242

while (w && (unsigned long)dst & 15)

3243

{

3244

uint32_t s = (*src++) | 0xff000000;

3245

uint32_t d = *dst;

3246

3247

__m64 src = unpack_32_1x64 (s);

3248

__m64 alpha = _mm_movepi64_pi64 (xmm_alpha);

3249

__m64 mask = _mm_movepi64_pi64 (xmm_mask);

3250

__m64 dest = unpack_32_1x64 (d);

3251

3252

*dst++ = pack_1x64_32 (

3253

in_over_1x64 (&src, &alpha, &mask, &dest));

3254

3255

w--;

3256

}

3257

3258

while (w >= 4)

3259

{

3260

xmm_src = _mm_or_si128 (

3261

load_128_unaligned ((__m128i*)src), mask_ff000000);

3262

xmm_dst = load_128_aligned ((__m128i*)dst);

3263

3264

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

3265

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

3266

3267

in_over_2x128 (&xmm_src_lo, &xmm_src_hi,

3268

&xmm_alpha, &xmm_alpha,

3269

&xmm_mask, &xmm_mask,

3270

&xmm_dst_lo, &xmm_dst_hi);

3271

3272

save_128_aligned (

3273

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

3274

3275

dst += 4;

3276

src += 4;

3277

w -= 4;

3278

3279

}

3280

3281

while (w)

3282

{

3283

uint32_t s = (*src++) | 0xff000000;

3284

uint32_t d = *dst;

3285

3286

__m64 src = unpack_32_1x64 (s);

3287

__m64 alpha = _mm_movepi64_pi64 (xmm_alpha);

3288

__m64 mask = _mm_movepi64_pi64 (xmm_mask);

3289

__m64 dest = unpack_32_1x64 (d);

3290

3291

*dst++ = pack_1x64_32 (

3292

in_over_1x64 (&src, &alpha, &mask, &dest));

3293

3294

w--;

3295

}

3296

}

3297

3298

_mm_empty ();

3299

}

3300

3301

/* --------------------------------------------------------------------

3302

* composite_over_8888_8888

3303

3304

static void

3305

sse2_composite_over_8888_8888 (pixman_implementation_t *imp,

3306

pixman_op_t op,

3307

pixman_image_t * src_image,

3308

pixman_image_t * mask_image,

3309

pixman_image_t * dst_image,

3310

int32_t src_x,

3311

int32_t src_y,

3312

int32_t mask_x,

3313

int32_t mask_y,

3314

int32_t dest_x,

3315

int32_t dest_y,

3316

int32_t width,

3317

int32_t height)

3318

{

3319

int dst_stride, src_stride;

3320

uint32_t *dst_line, *dst;

3321

uint32_t *src_line, *src;

3322

3323

3324

3325

3326

3327

3328

dst = dst_line;

3329

src = src_line;

3330

3331

while (height--)

3332

{

3333

core_combine_over_u_sse2 (dst, src, NULL((void*)0), width);

3334

3335

dst += dst_stride;

3336

src += src_stride;

3337

}

3338

_mm_empty ();

3339

}

3340

3341

/* ------------------------------------------------------------------

3342

* composite_over_8888_0565

3343

3344

static force_inline__inline__ __attribute__ ((__always_inline__)) uint16_t

3345

composite_over_8888_0565pixel (uint32_t src, uint16_t dst)

3346

{

3347

__m64 ms;

3348

3349

ms = unpack_32_1x64 (src);

3350

return pack_565_32_16 (

3351

pack_1x64_32 (

3352

over_1x64 (

3353

ms, expand_alpha_1x64 (ms), expand565_16_1x64 (dst))));

3354

}

3355

3356

static void

3357

sse2_composite_over_8888_0565 (pixman_implementation_t *imp,

3358

pixman_op_t op,

3359

pixman_image_t * src_image,

3360

pixman_image_t * mask_image,

3361

pixman_image_t * dst_image,

3362

int32_t src_x,

3363

int32_t src_y,

3364

int32_t mask_x,

3365

int32_t mask_y,

3366

int32_t dest_x,

3367

int32_t dest_y,

3368

int32_t width,

3369

int32_t height)

3370

{

3371

uint16_t *dst_line, *dst, d;

3372

uint32_t *src_line, *src, s;

3373

int dst_stride, src_stride;

3374

int32_t w;

3375

3376

__m128i xmm_alpha_lo, xmm_alpha_hi;

3377

__m128i xmm_src, xmm_src_lo, xmm_src_hi;

3378

__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;

3379

3380

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

3381

3382

3383

3384

3385

#if 0

3386

/* FIXME

3387

3388

* I copy the code from MMX one and keep the fixme.

3389

* If it's a problem there, probably is a problem here.

3390

3391

assert (src_image->drawable == mask_image->drawable)((src_image->drawable == mask_image->drawable) ? (void)
(0) : __assert_fail ("src_image->drawable == mask_image->drawable"
, "pixman-sse2.c", 3391, __PRETTY_FUNCTION__));

3392

#endif

3393

3394

while (height--)

3395

{

3396

dst = dst_line;

3397

src = src_line;

3398

3399

dst_line += dst_stride;

3400

src_line += src_stride;

3401

w = width;

3402

3403

/* Align dst on a 16-byte boundary */

3404

while (w &&

3405

((unsigned long)dst & 15))

3406

{

3407

s = *src++;

3408

d = *dst;

3409

3410

*dst++ = composite_over_8888_0565pixel (s, d);

3411

w--;

3412

}

3413

3414

/* It's a 8 pixel loop */

3415

while (w >= 8)

3416

{

3417

/* I'm loading unaligned because I'm not sure

3418

* about the address alignment.

3419

3420

xmm_src = load_128_unaligned ((__m128i*) src);

3421

xmm_dst = load_128_aligned ((__m128i*) dst);

3422

3423

/* Unpacking */

3424

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

3425

unpack_565_128_4x128 (xmm_dst,

3426

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);

3427

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

3428

&xmm_alpha_lo, &xmm_alpha_hi);

3429

3430

/* I'm loading next 4 pixels from memory

3431

* before to optimze the memory read.

3432

3433

xmm_src = load_128_unaligned ((__m128i*) (src + 4));

3434

3435

over_2x128 (&xmm_src_lo, &xmm_src_hi,

3436

&xmm_alpha_lo, &xmm_alpha_hi,

3437

&xmm_dst0, &xmm_dst1);

3438

3439

/* Unpacking */

3440

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

3441

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,

3442

&xmm_alpha_lo, &xmm_alpha_hi);

3443

3444

over_2x128 (&xmm_src_lo, &xmm_src_hi,

3445

&xmm_alpha_lo, &xmm_alpha_hi,

3446

&xmm_dst2, &xmm_dst3);

3447

3448

save_128_aligned (

3449

(__m128i*)dst, pack_565_4x128_128 (

3450

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));

3451

3452

w -= 8;

3453

dst += 8;

3454

src += 8;

3455

}

3456

3457

while (w--)

3458

{

3459

s = *src++;

3460

d = *dst;

3461

3462

*dst++ = composite_over_8888_0565pixel (s, d);

3463

}

3464

}

3465

3466

_mm_empty ();

3467

}

3468

3469

/* -----------------------------------------------------------------

3470

* composite_over_n_8_8888

3471

3472

3473

static void

3474

sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,

3475

pixman_op_t op,

3476

pixman_image_t * src_image,

3477

pixman_image_t * mask_image,

3478

pixman_image_t * dst_image,

3479

int32_t src_x,

3480

int32_t src_y,

3481

int32_t mask_x,

3482

int32_t mask_y,

3483

int32_t dest_x,

3484

int32_t dest_y,

3485

int32_t width,

3486

int32_t height)

3487

{

3488

uint32_t src, srca;

3489

uint32_t *dst_line, *dst;

3490

uint8_t *mask_line, *mask;

3491

int dst_stride, mask_stride;

3492

int32_t w;

3493

uint32_t m, d;

3494

3495

__m128i xmm_src, xmm_alpha, xmm_def;

3496

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

3497

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

3498

3499

__m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;

3500

3501

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

3502

3503

srca = src >> 24;

3504

if (src == 0)

3505

return;

3506

3507

3508

3509

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)

3510

mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0);

3511

3512

xmm_def = create_mask_2x32_128 (src, src);

3513

xmm_src = expand_pixel_32_1x128 (src);

3514

xmm_alpha = expand_alpha_1x128 (xmm_src);

3515

mmx_src = _mm_movepi64_pi64 (xmm_src);

3516

mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);

3517

3518

while (height--)

3519

{

3520

dst = dst_line;

3521

dst_line += dst_stride;

3522

mask = mask_line;

3523

mask_line += mask_stride;

3524

w = width;

3525

3526

while (w && (unsigned long)dst & 15)

3527

{

3528

uint8_t m = *mask++;

3529

3530

if (m)

3531

{

3532

d = *dst;

3533

mmx_mask = expand_pixel_8_1x64 (m);

3534

mmx_dest = unpack_32_1x64 (d);

3535

3536

*dst = pack_1x64_32 (in_over_1x64 (&mmx_src,

3537

&mmx_alpha,

3538

&mmx_mask,

3539

&mmx_dest));

3540

}

3541

3542

w--;

3543

dst++;

3544

}

3545

3546

while (w >= 4)

3547

{

3548

m = *((uint32_t*)mask);

3549

3550

if (srca == 0xff && m == 0xffffffff)

3551

{

3552

save_128_aligned ((__m128i*)dst, xmm_def);

3553

}

3554

else if (m)

3555

{

3556

xmm_dst = load_128_aligned ((__m128i*) dst);

3557

xmm_mask = unpack_32_1x128 (m);

3558

xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());

3559

3560

/* Unpacking */

3561

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

3562

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

3563

3564

expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,

3565

&xmm_mask_lo, &xmm_mask_hi);

3566

3567

in_over_2x128 (&xmm_src, &xmm_src,

3568

&xmm_alpha, &xmm_alpha,

3569

&xmm_mask_lo, &xmm_mask_hi,

3570

&xmm_dst_lo, &xmm_dst_hi);

3571

3572

save_128_aligned (

3573

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

3574

}

3575

3576

w -= 4;

3577

dst += 4;

3578

mask += 4;

3579

}

3580

3581

while (w)

3582

{

3583

uint8_t m = *mask++;

3584

3585

if (m)

3586

{

3587

d = *dst;

3588

mmx_mask = expand_pixel_8_1x64 (m);

3589

mmx_dest = unpack_32_1x64 (d);

3590

3591

*dst = pack_1x64_32 (in_over_1x64 (&mmx_src,

3592

&mmx_alpha,

3593

&mmx_mask,

3594

&mmx_dest));

3595

}

3596

3597

w--;

3598

dst++;

3599

}

3600

}

3601

3602

_mm_empty ();

3603

}

3604

3605

/* ----------------------------------------------------------------

3606

* composite_over_n_8_8888

3607

3608

3609

pixman_bool_t

3610

pixman_fill_sse2 (uint32_t *bits,

3611

int stride,

3612

int bpp,

3613

int x,

3614

int y,

3615

int width,

3616

int height,

3617

uint32_t data)

3618

{

3619

uint32_t byte_width;

3620

uint8_t *byte_line;

3621

3622

__m128i xmm_def;

3623

3624

if (bpp == 8)

3625

{

3626

uint8_t b;

3627

uint16_t w;

3628

3629

stride = stride * (int) sizeof (uint32_t) / 1;

3630

byte_line = (uint8_t *)(((uint8_t *)bits) + stride * y + x);

3631

byte_width = width;

3632

stride *= 1;

3633

3634

b = data & 0xff;

3635

w = (b << 8) | b;

3636

data = (w << 16) | w;

3637

}

3638

else if (bpp == 16)

3639

{

3640

stride = stride * (int) sizeof (uint32_t) / 2;

3641

byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);

3642

byte_width = 2 * width;

3643

stride *= 2;

3644

3645

data = (data & 0xffff) * 0x00010001;

3646

}

3647

else if (bpp == 32)

3648

{

3649

stride = stride * (int) sizeof (uint32_t) / 4;

3650

byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);

3651

byte_width = 4 * width;

3652

stride *= 4;

3653

}

3654

else

3655

{

3656

return FALSE0;

3657

}

3658

3659

xmm_def = create_mask_2x32_128 (data, data);

3660

3661

while (height--)

3662

{

3663

int w;

3664

uint8_t *d = byte_line;

3665

byte_line += stride;

3666

w = byte_width;

3667

3668

while (w >= 1 && ((unsigned long)d & 1))

3669

{

3670

*(uint8_t *)d = data;

3671

w -= 1;

3672

d += 1;

3673

}

3674

3675

while (w >= 2 && ((unsigned long)d & 3))

3676

{

3677

*(uint16_t *)d = data;

3678

w -= 2;

3679

d += 2;

3680

}

3681

3682

while (w >= 4 && ((unsigned long)d & 15))

3683

{

3684

*(uint32_t *)d = data;

3685

3686

w -= 4;

3687

d += 4;

3688

}

3689

3690

while (w >= 128)

3691

{

3692

save_128_aligned ((__m128i*)(d), xmm_def);

3693

save_128_aligned ((__m128i*)(d + 16), xmm_def);

3694

save_128_aligned ((__m128i*)(d + 32), xmm_def);

3695

save_128_aligned ((__m128i*)(d + 48), xmm_def);

3696

save_128_aligned ((__m128i*)(d + 64), xmm_def);

3697

save_128_aligned ((__m128i*)(d + 80), xmm_def);

3698

save_128_aligned ((__m128i*)(d + 96), xmm_def);

3699

save_128_aligned ((__m128i*)(d + 112), xmm_def);

3700

3701

d += 128;

3702

w -= 128;

3703

}

3704

3705

if (w >= 64)

3706

{

3707

save_128_aligned ((__m128i*)(d), xmm_def);

3708

save_128_aligned ((__m128i*)(d + 16), xmm_def);

3709

save_128_aligned ((__m128i*)(d + 32), xmm_def);

3710

save_128_aligned ((__m128i*)(d + 48), xmm_def);

3711

3712

d += 64;

3713

w -= 64;

3714

}

3715

3716

if (w >= 32)

3717

{

3718

save_128_aligned ((__m128i*)(d), xmm_def);

3719

save_128_aligned ((__m128i*)(d + 16), xmm_def);

3720

3721

d += 32;

3722

w -= 32;

3723

}

3724

3725

if (w >= 16)

3726

{

3727

save_128_aligned ((__m128i*)(d), xmm_def);

3728

3729

d += 16;

3730

w -= 16;

3731

}

3732

3733

while (w >= 4)

3734

{

3735

*(uint32_t *)d = data;

3736

3737

w -= 4;

3738

d += 4;

3739

}

3740

3741

if (w >= 2)

3742

{

3743

*(uint16_t *)d = data;

3744

w -= 2;

3745

d += 2;

3746

}

3747

3748

if (w >= 1)

3749

{

3750

*(uint8_t *)d = data;

3751

w -= 1;

3752

d += 1;

3753

}

3754

}

3755

3756

_mm_empty ();

3757

return TRUE1;

3758

}

3759

3760

static void

3761

sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,

3762

pixman_op_t op,

3763

pixman_image_t * src_image,

3764

pixman_image_t * mask_image,

3765

pixman_image_t * dst_image,

3766

int32_t src_x,

3767

int32_t src_y,

3768

int32_t mask_x,

3769

int32_t mask_y,

3770

int32_t dest_x,

3771

int32_t dest_y,

3772

int32_t width,

3773

int32_t height)

3774

{

3775

uint32_t src, srca;

3776

uint32_t *dst_line, *dst;

3777

uint8_t *mask_line, *mask;

3778

int dst_stride, mask_stride;

3779

int32_t w;

3780

uint32_t m;

3781

3782

__m128i xmm_src, xmm_def;

3783

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

3784

3785

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

3786

3787

srca = src >> 24;

3788

if (src == 0)

3789

{

3790

pixman_fill_sse2 (dst_image->bits.bits, dst_image->bits.rowstride,

3791

PIXMAN_FORMAT_BPP (dst_image->bits.format)(((dst_image->bits.format) >> 24) ),

3792

dest_x, dest_y, width, height, 0);

3793

return;

3794

}

3795

3796

3797

3798

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)

3799

3800

3801

xmm_def = create_mask_2x32_128 (src, src);

3802

xmm_src = expand_pixel_32_1x128 (src);

3803

3804

while (height--)

3805

{

3806

dst = dst_line;

3807

dst_line += dst_stride;

3808

mask = mask_line;

3809

mask_line += mask_stride;

3810

w = width;

3811

3812

while (w && (unsigned long)dst & 15)

3813

{

3814

uint8_t m = *mask++;

3815

3816

if (m)

3817

{

3818

*dst = pack_1x64_32 (

3819

pix_multiply_1x64 (

3820

_mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));

3821

}

3822

else

3823

{

3824

*dst = 0;

3825

}

3826

3827

w--;

3828

dst++;

3829

}

3830

3831

while (w >= 4)

3832

{

3833

m = *((uint32_t*)mask);

3834

3835

if (srca == 0xff && m == 0xffffffff)

3836

{

3837

save_128_aligned ((__m128i*)dst, xmm_def);

3838

}

3839

else if (m)

3840

{

3841

xmm_mask = unpack_32_1x128 (m);

3842

xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());

3843

3844

/* Unpacking */

3845

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

3846

3847

expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,

3848

&xmm_mask_lo, &xmm_mask_hi);

3849

3850

pix_multiply_2x128 (&xmm_src, &xmm_src,

3851

&xmm_mask_lo, &xmm_mask_hi,

3852

&xmm_mask_lo, &xmm_mask_hi);

3853

3854

save_128_aligned (

3855

(__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));

3856

}

3857

else

3858

{

3859

save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());

3860

}

3861

3862

w -= 4;

3863

dst += 4;

3864

mask += 4;

3865

}

3866

3867

while (w)

3868

{

3869

uint8_t m = *mask++;

3870

3871

if (m)

3872

{

3873

*dst = pack_1x64_32 (

3874

pix_multiply_1x64 (

3875

_mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));

3876

}

3877

else

3878

{

3879

*dst = 0;

3880

}

3881

3882

w--;

3883

dst++;

3884

}

3885

}

3886

3887

_mm_empty ();

3888

}

3889

3890

/*-----------------------------------------------------------------------

3891

* composite_over_n_8_0565

3892

3893

3894

static void

3895

sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,

3896

pixman_op_t op,

3897

pixman_image_t * src_image,

3898

pixman_image_t * mask_image,

3899

pixman_image_t * dst_image,

3900

int32_t src_x,

3901

int32_t src_y,

3902

int32_t mask_x,

3903

int32_t mask_y,

3904

int32_t dest_x,

3905

int32_t dest_y,

3906

int32_t width,

3907

int32_t height)

3908

{

3909

uint32_t src, srca;

3910

uint16_t *dst_line, *dst, d;

3911

uint8_t *mask_line, *mask;

3912

int dst_stride, mask_stride;

3913

int32_t w;

3914

uint32_t m;

3915

__m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;

3916

3917

__m128i xmm_src, xmm_alpha;

3918

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

3919

__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;

3920

3921

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

3922

3923

srca = src >> 24;

3924

if (src == 0)

3925

return;

3926

3927

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

3928

3929

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)

3930

3931

3932

xmm_src = expand_pixel_32_1x128 (src);

3933

xmm_alpha = expand_alpha_1x128 (xmm_src);

3934

mmx_src = _mm_movepi64_pi64 (xmm_src);

3935

mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);

3936

3937

while (height--)

3938

{

3939

dst = dst_line;

3940

dst_line += dst_stride;

3941

mask = mask_line;

3942

mask_line += mask_stride;

3943

w = width;

3944

3945

while (w && (unsigned long)dst & 15)

3946

{

3947

m = *mask++;

3948

3949

if (m)

3950

{

3951

d = *dst;

3952

mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));

3953

mmx_dest = expand565_16_1x64 (d);

3954

3955

*dst = pack_565_32_16 (

3956

pack_1x64_32 (

3957

in_over_1x64 (

3958

&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));

3959

}

3960

3961

w--;

3962

dst++;

3963

}

3964

3965

while (w >= 8)

3966

{

3967

xmm_dst = load_128_aligned ((__m128i*) dst);

3968

unpack_565_128_4x128 (xmm_dst,

3969

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);

3970

3971

m = *((uint32_t*)mask);

3972

mask += 4;

3973

3974

if (m)

3975

{

3976

xmm_mask = unpack_32_1x128 (m);

3977

xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());

3978

3979

/* Unpacking */

3980

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

3981

3982

expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,

3983

&xmm_mask_lo, &xmm_mask_hi);

3984

3985

in_over_2x128 (&xmm_src, &xmm_src,

3986

&xmm_alpha, &xmm_alpha,

3987

&xmm_mask_lo, &xmm_mask_hi,

3988

&xmm_dst0, &xmm_dst1);

3989

}

3990

3991

m = *((uint32_t*)mask);

3992

mask += 4;

3993

3994

if (m)

3995

{

3996

xmm_mask = unpack_32_1x128 (m);

3997

xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());

3998

3999

/* Unpacking */

4000

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

4001

4002

expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,

4003

&xmm_mask_lo, &xmm_mask_hi);

4004

in_over_2x128 (&xmm_src, &xmm_src,

4005

&xmm_alpha, &xmm_alpha,

4006

&xmm_mask_lo, &xmm_mask_hi,

4007

&xmm_dst2, &xmm_dst3);

4008

}

4009

4010

save_128_aligned (

4011

(__m128i*)dst, pack_565_4x128_128 (

4012

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));

4013

4014

w -= 8;

4015

dst += 8;

4016

}

4017

4018

while (w)

4019

{

4020

m = *mask++;

4021

4022

if (m)

4023

{

4024

d = *dst;

4025

mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));

4026

mmx_dest = expand565_16_1x64 (d);

4027

4028

*dst = pack_565_32_16 (

4029

pack_1x64_32 (

4030

in_over_1x64 (

4031

&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));

4032

}

4033

4034

w--;

4035

dst++;

4036

}

4037

}

4038

4039

_mm_empty ();

4040

}

4041

4042

/* -----------------------------------------------------------------------

4043

* composite_over_pixbuf_0565

4044

4045

4046

static void

4047

sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,

4048

pixman_op_t op,

4049

pixman_image_t * src_image,

4050

pixman_image_t * mask_image,

4051

pixman_image_t * dst_image,

4052

int32_t src_x,

4053

int32_t src_y,

4054

int32_t mask_x,

4055

int32_t mask_y,

4056

int32_t dest_x,

4057

int32_t dest_y,

4058

int32_t width,

4059

int32_t height)

4060

{

4061

uint16_t *dst_line, *dst, d;

4062

uint32_t *src_line, *src, s;

4063

int dst_stride, src_stride;

4064

int32_t w;

4065

uint32_t opaque, zero;

4066

4067

__m64 ms;

4068

__m128i xmm_src, xmm_src_lo, xmm_src_hi;

4069

__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;

4070

4071

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

4072

4073

4074

4075

4076

#if 0

4077

/* FIXME

4078

4079

* I copy the code from MMX one and keep the fixme.

4080

* If it's a problem there, probably is a problem here.

4081

4082

4083

#endif

4084

4085

while (height--)

4086

{

4087

dst = dst_line;

4088

dst_line += dst_stride;

4089

src = src_line;

4090

src_line += src_stride;

4091

w = width;

4092

4093

while (w && (unsigned long)dst & 15)

4094

{

4095

s = *src++;

4096

d = *dst;

4097

4098

ms = unpack_32_1x64 (s);

4099

4100

*dst++ = pack_565_32_16 (

4101

pack_1x64_32 (

4102

over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));

4103

w--;

4104

}

4105

4106

while (w >= 8)

4107

{

4108

/* First round */

4109

xmm_src = load_128_unaligned ((__m128i*)src);

4110

xmm_dst = load_128_aligned ((__m128i*)dst);

4111

4112

opaque = is_opaque (xmm_src);

4113

zero = is_zero (xmm_src);

4114

4115

unpack_565_128_4x128 (xmm_dst,

4116

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);

4117

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

4118

4119

/* preload next round*/

4120

xmm_src = load_128_unaligned ((__m128i*)(src + 4));

4121

4122

if (opaque)

4123

{

4124

invert_colors_2x128 (xmm_src_lo, xmm_src_hi,

4125

&xmm_dst0, &xmm_dst1);

4126

}

4127

else if (!zero)

4128

{

4129

over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,

4130

&xmm_dst0, &xmm_dst1);

4131

}

4132

4133

/* Second round */

4134

opaque = is_opaque (xmm_src);

4135

zero = is_zero (xmm_src);

4136

4137

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

4138

4139

if (opaque)

4140

{

4141

invert_colors_2x128 (xmm_src_lo, xmm_src_hi,

4142

&xmm_dst2, &xmm_dst3);

4143

}

4144

else if (!zero)

4145

{

4146

over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,

4147

&xmm_dst2, &xmm_dst3);

4148

}

4149

4150

save_128_aligned (

4151

(__m128i*)dst, pack_565_4x128_128 (

4152

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));

4153

4154

w -= 8;

4155

src += 8;

4156

dst += 8;

4157

}

4158

4159

while (w)

4160

{

4161

s = *src++;

4162

d = *dst;

4163

4164

ms = unpack_32_1x64 (s);

4165

4166

*dst++ = pack_565_32_16 (

4167

pack_1x64_32 (

4168

over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));

4169

w--;

4170

}

4171

}

4172

4173

_mm_empty ();

4174

}

4175

4176

/* -------------------------------------------------------------------------

4177

* composite_over_pixbuf_8888

4178

4179

4180

static void

4181

sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,

4182

pixman_op_t op,

4183

pixman_image_t * src_image,

4184

pixman_image_t * mask_image,

4185

pixman_image_t * dst_image,

4186

int32_t src_x,

4187

int32_t src_y,

4188

int32_t mask_x,

4189

int32_t mask_y,

4190

int32_t dest_x,

4191

int32_t dest_y,

4192

int32_t width,

4193

int32_t height)

4194

{

4195

uint32_t *dst_line, *dst, d;

4196

uint32_t *src_line, *src, s;

4197

int dst_stride, src_stride;

4198

int32_t w;

4199

uint32_t opaque, zero;

4200

4201

__m128i xmm_src_lo, xmm_src_hi;

4202

__m128i xmm_dst_lo, xmm_dst_hi;

4203

4204

4205

4206

4207

4208

4209

#if 0

4210

/* FIXME

4211

4212

* I copy the code from MMX one and keep the fixme.

4213

* If it's a problem there, probably is a problem here.

4214

4215

4216

#endif

4217

4218

while (height--)

4219

{

4220

dst = dst_line;

4221

dst_line += dst_stride;

4222

src = src_line;

4223

src_line += src_stride;

4224

w = width;

4225

4226

while (w && (unsigned long)dst & 15)

4227

{

4228

s = *src++;

4229

d = *dst;

4230

4231

*dst++ = pack_1x64_32 (

4232

over_rev_non_pre_1x64 (

4233

unpack_32_1x64 (s), unpack_32_1x64 (d)));

4234

4235

w--;

4236

}

4237

4238

while (w >= 4)

4239

{

4240

xmm_src_hi = load_128_unaligned ((__m128i*)src);

4241

4242

opaque = is_opaque (xmm_src_hi);

4243

zero = is_zero (xmm_src_hi);

4244

4245

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

4246

4247

if (opaque)

4248

{

4249

invert_colors_2x128 (xmm_src_lo, xmm_src_hi,

4250

&xmm_dst_lo, &xmm_dst_hi);

4251

4252

save_128_aligned (

4253

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

4254

}

4255

else if (!zero)

4256

{

4257

xmm_dst_hi = load_128_aligned ((__m128i*)dst);

4258

4259

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

4260

4261

over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,

4262

&xmm_dst_lo, &xmm_dst_hi);

4263

4264

save_128_aligned (

4265

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

4266

}

4267

4268

w -= 4;

4269

dst += 4;

4270

src += 4;

4271

}

4272

4273

while (w)

4274

{

4275

s = *src++;

4276

d = *dst;

4277

4278

*dst++ = pack_1x64_32 (

4279

over_rev_non_pre_1x64 (

4280

unpack_32_1x64 (s), unpack_32_1x64 (d)));

4281

4282

w--;

4283

}

4284

}

4285

4286

_mm_empty ();

4287

}

4288

4289

/* -------------------------------------------------------------------------------------------------

4290

* composite_over_n_8888_0565_ca

4291

4292

4293

static void

4294

sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,

4295

pixman_op_t op,

4296

pixman_image_t * src_image,

4297

pixman_image_t * mask_image,

4298

pixman_image_t * dst_image,

4299

int32_t src_x,

4300

int32_t src_y,

4301

int32_t mask_x,

4302

int32_t mask_y,

4303

int32_t dest_x,

4304

int32_t dest_y,

4305

int32_t width,

4306

int32_t height)

4307

{

4308

uint32_t src;

4309

uint16_t *dst_line, *dst, d;

4310

uint32_t *mask_line, *mask, m;

4311

int dst_stride, mask_stride;

4312

int w;

4313

uint32_t pack_cmp;

4314

4315

__m128i xmm_src, xmm_alpha;

4316

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

4317

__m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;

4318

4319

__m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;

4320

4321

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

4322

4323

if (src == 0)

4324

return;

4325

4326

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint16_t
); (dst_line) = ((uint16_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

4327

4328

4329

4330

4331

xmm_src = expand_pixel_32_1x128 (src);

4332

xmm_alpha = expand_alpha_1x128 (xmm_src);

4333

mmx_src = _mm_movepi64_pi64 (xmm_src);

4334

mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);

4335

4336

while (height--)

4337

{

4338

w = width;

4339

mask = mask_line;

4340

dst = dst_line;

4341

mask_line += mask_stride;

4342

dst_line += dst_stride;

4343

4344

while (w && ((unsigned long)dst & 15))

4345

{

4346

m = *(uint32_t *) mask;

4347

4348

if (m)

4349

{

4350

d = *dst;

4351

mmx_mask = unpack_32_1x64 (m);

4352

mmx_dest = expand565_16_1x64 (d);

4353

4354

*dst = pack_565_32_16 (

4355

pack_1x64_32 (

4356

in_over_1x64 (

4357

&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));

4358

}

4359

4360

w--;

4361

dst++;

4362

mask++;

4363

}

4364

4365

while (w >= 8)

4366

{

4367

/* First round */

4368

xmm_mask = load_128_unaligned ((__m128i*)mask);

4369

xmm_dst = load_128_aligned ((__m128i*)dst);

4370

4371

pack_cmp = _mm_movemask_epi8 (

4372

_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));

4373

4374

unpack_565_128_4x128 (xmm_dst,

4375

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);

4376

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

4377

4378

/* preload next round */

4379

xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));

4380

4381

/* preload next round */

4382

if (pack_cmp != 0xffff)

4383

{

4384

in_over_2x128 (&xmm_src, &xmm_src,

4385

&xmm_alpha, &xmm_alpha,

4386

&xmm_mask_lo, &xmm_mask_hi,

4387

&xmm_dst0, &xmm_dst1);

4388

}

4389

4390

/* Second round */

4391

pack_cmp = _mm_movemask_epi8 (

4392

_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));

4393

4394

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

4395

4396

if (pack_cmp != 0xffff)

4397

{

4398

in_over_2x128 (&xmm_src, &xmm_src,

4399

&xmm_alpha, &xmm_alpha,

4400

&xmm_mask_lo, &xmm_mask_hi,

4401

&xmm_dst2, &xmm_dst3);

4402

}

4403

4404

save_128_aligned (

4405

(__m128i*)dst, pack_565_4x128_128 (

4406

&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));

4407

4408

w -= 8;

4409

dst += 8;

4410

mask += 8;

4411

}

4412

4413

while (w)

4414

{

4415

m = *(uint32_t *) mask;

4416

4417

if (m)

4418

{

4419

d = *dst;

4420

mmx_mask = unpack_32_1x64 (m);

4421

mmx_dest = expand565_16_1x64 (d);

4422

4423

*dst = pack_565_32_16 (

4424

pack_1x64_32 (

4425

in_over_1x64 (

4426

&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));

4427

}

4428

4429

w--;

4430

dst++;

4431

mask++;

4432

}

4433

}

4434

4435

_mm_empty ();

4436

}

4437

4438

/* -----------------------------------------------------------------------

4439

* composite_in_n_8_8

4440

4441

4442

static void

4443

sse2_composite_in_n_8_8 (pixman_implementation_t *imp,

4444

pixman_op_t op,

4445

pixman_image_t * src_image,

4446

pixman_image_t * mask_image,

4447

pixman_image_t * dst_image,

4448

int32_t src_x,

4449

int32_t src_y,

4450

int32_t mask_x,

4451

int32_t mask_y,

4452

int32_t dest_x,

4453

int32_t dest_y,

4454

int32_t width,

4455

int32_t height)

4456

{

4457

uint8_t *dst_line, *dst;

4458

uint8_t *mask_line, *mask;

4459

int dst_stride, mask_stride;

4460

uint32_t d, m;

4461

uint32_t src;

4462

uint8_t sa;

4463

int32_t w;

4464

4465

__m128i xmm_alpha;

4466

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

4467

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

4468

4469

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

4470

dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0);

4471

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)

4472

4473

4474

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

4475

4476

sa = src >> 24;

4477

4478

xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));

4479

4480

while (height--)

4481

{

4482

dst = dst_line;

4483

dst_line += dst_stride;

4484

mask = mask_line;

4485

mask_line += mask_stride;

4486

w = width;

4487

4488

while (w && ((unsigned long)dst & 15))

4489

{

4490

m = (uint32_t) *mask++;

4491

d = (uint32_t) *dst;

4492

4493

*dst++ = (uint8_t) pack_1x64_32 (

4494

pix_multiply_1x64 (

4495

pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha),

4496

unpack_32_1x64 (m)),

4497

unpack_32_1x64 (d)));

4498

w--;

4499

}

4500

4501

while (w >= 16)

4502

{

4503

xmm_mask = load_128_unaligned ((__m128i*)mask);

4504

xmm_dst = load_128_aligned ((__m128i*)dst);

4505

4506

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

4507

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

4508

4509

pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,

4510

&xmm_mask_lo, &xmm_mask_hi,

4511

&xmm_mask_lo, &xmm_mask_hi);

4512

4513

pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,

4514

&xmm_dst_lo, &xmm_dst_hi,

4515

&xmm_dst_lo, &xmm_dst_hi);

4516

4517

save_128_aligned (

4518

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

4519

4520

mask += 16;

4521

dst += 16;

4522

w -= 16;

4523

}

4524

4525

while (w)

4526

{

4527

m = (uint32_t) *mask++;

4528

d = (uint32_t) *dst;

4529

4530

*dst++ = (uint8_t) pack_1x64_32 (

4531

pix_multiply_1x64 (

4532

pix_multiply_1x64 (

4533

_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),

4534

unpack_32_1x64 (d)));

4535

w--;

4536

}

4537

}

4538

4539

_mm_empty ();

4540

}

4541

4542

/* -----------------------------------------------------------------------

4543

* composite_in_n_8

4544

4545

4546

static void

4547

sse2_composite_in_n_8 (pixman_implementation_t *imp,

4548

pixman_op_t op,

4549

pixman_image_t * src_image,

4550

pixman_image_t * mask_image,

4551

pixman_image_t * dst_image,

4552

int32_t src_x,

4553

int32_t src_y,

4554

int32_t mask_x,

4555

int32_t mask_y,

4556

int32_t dest_x,

4557

int32_t dest_y,

4558

int32_t width,

4559

int32_t height)

4560

{

4561

uint8_t *dst_line, *dst;

4562

int dst_stride;

4563

uint32_t d;

4564

uint32_t src;

4565

int32_t w;

4566

4567

__m128i xmm_alpha;

4568

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

4569

4570

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

4571

4572

4573

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

4574

4575

xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));

4576

4577

src = src >> 24;

4578

4579

if (src == 0xff)

4580

return;

4581

4582

if (src == 0x00)

4583

{

4584

pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,

4585

8, dest_x, dest_y, width, height, src);

4586

4587

return;

4588

}

4589

4590

while (height--)

4591

{

4592

dst = dst_line;

4593

dst_line += dst_stride;

4594

w = width;

4595

4596

while (w && ((unsigned long)dst & 15))

4597

{

4598

d = (uint32_t) *dst;

4599

4600

*dst++ = (uint8_t) pack_1x64_32 (

4601

pix_multiply_1x64 (

4602

_mm_movepi64_pi64 (xmm_alpha),

4603

unpack_32_1x64 (d)));

4604

w--;

4605

}

4606

4607

while (w >= 16)

4608

{

4609

xmm_dst = load_128_aligned ((__m128i*)dst);

4610

4611

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

4612

4613

pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,

4614

&xmm_dst_lo, &xmm_dst_hi,

4615

&xmm_dst_lo, &xmm_dst_hi);

4616

4617

save_128_aligned (

4618

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

4619

4620

dst += 16;

4621

w -= 16;

4622

}

4623

4624

while (w)

4625

{

4626

d = (uint32_t) *dst;

4627

4628

*dst++ = (uint8_t) pack_1x64_32 (

4629

pix_multiply_1x64 (

4630

_mm_movepi64_pi64 (xmm_alpha),

4631

unpack_32_1x64 (d)));

4632

w--;

4633

}

4634

}

4635

4636

_mm_empty ();

4637

}

4638

4639

/* ---------------------------------------------------------------------------

4640

* composite_in_8_8

4641

4642

4643

static void

4644

sse2_composite_in_8_8 (pixman_implementation_t *imp,

4645

pixman_op_t op,

4646

pixman_image_t * src_image,

4647

pixman_image_t * mask_image,

4648

pixman_image_t * dst_image,

4649

int32_t src_x,

4650

int32_t src_y,

4651

int32_t mask_x,

4652

int32_t mask_y,

4653

int32_t dest_x,

4654

int32_t dest_y,

4655

int32_t width,

4656

int32_t height)

4657

{

4658

uint8_t *dst_line, *dst;

4659

uint8_t *src_line, *src;

4660

int src_stride, dst_stride;

4661

int32_t w;

4662

uint32_t s, d;

4663

4664

__m128i xmm_src, xmm_src_lo, xmm_src_hi;

4665

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

4666

4667

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

4668

4669

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)

4670

src_image, src_x, src_y, uint8_t, src_stride, src_line, 1)do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0);

4671

4672

while (height--)

4673

{

4674

dst = dst_line;

4675

dst_line += dst_stride;

4676

src = src_line;

4677

src_line += src_stride;

4678

w = width;

4679

4680

while (w && ((unsigned long)dst & 15))

4681

{

4682

s = (uint32_t) *src++;

4683

d = (uint32_t) *dst;

4684

4685

*dst++ = (uint8_t) pack_1x64_32 (

4686

pix_multiply_1x64 (

4687

unpack_32_1x64 (s), unpack_32_1x64 (d)));

4688

w--;

4689

}

4690

4691

while (w >= 16)

4692

{

4693

xmm_src = load_128_unaligned ((__m128i*)src);

4694

xmm_dst = load_128_aligned ((__m128i*)dst);

4695

4696

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

4697

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

4698

4699

pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,

4700

&xmm_dst_lo, &xmm_dst_hi,

4701

&xmm_dst_lo, &xmm_dst_hi);

4702

4703

save_128_aligned (

4704

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

4705

4706

src += 16;

4707

dst += 16;

4708

w -= 16;

4709

}

4710

4711

while (w)

4712

{

4713

s = (uint32_t) *src++;

4714

d = (uint32_t) *dst;

4715

4716

*dst++ = (uint8_t) pack_1x64_32 (

4717

pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d)));

4718

w--;

4719

}

4720

}

4721

4722

_mm_empty ();

4723

}

4724

4725

/* -------------------------------------------------------------------------

4726

* composite_add_n_8_8

4727

4728

4729

static void

4730

sse2_composite_add_n_8_8 (pixman_implementation_t *imp,

4731

pixman_op_t op,

4732

pixman_image_t * src_image,

4733

pixman_image_t * mask_image,

4734

pixman_image_t * dst_image,

4735

int32_t src_x,

4736

int32_t src_y,

4737

int32_t mask_x,

4738

int32_t mask_y,

4739

int32_t dest_x,

4740

int32_t dest_y,

4741

int32_t width,

4742

int32_t height)

4743

{

4744

uint8_t *dst_line, *dst;

4745

uint8_t *mask_line, *mask;

4746

int dst_stride, mask_stride;

4747

int32_t w;

4748

uint32_t src;

4749

uint8_t sa;

4750

uint32_t m, d;

4751

4752

__m128i xmm_alpha;

4753

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

4754

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

4755

4756

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

4757

4758

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)

4759

4760

4761

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

4762

4763

sa = src >> 24;

4764

4765

xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));

4766

4767

while (height--)

4768

{

4769

dst = dst_line;

4770

dst_line += dst_stride;

4771

mask = mask_line;

4772

mask_line += mask_stride;

4773

w = width;

4774

4775

while (w && ((unsigned long)dst & 15))

4776

{

4777

m = (uint32_t) *mask++;

4778

d = (uint32_t) *dst;

4779

4780

*dst++ = (uint8_t) pack_1x64_32 (

4781

_mm_adds_pu16 (

4782

pix_multiply_1x64 (

4783

_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),

4784

unpack_32_1x64 (d)));

4785

w--;

4786

}

4787

4788

while (w >= 16)

4789

{

4790

xmm_mask = load_128_unaligned ((__m128i*)mask);

4791

xmm_dst = load_128_aligned ((__m128i*)dst);

4792

4793

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

4794

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

4795

4796

pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,

4797

&xmm_mask_lo, &xmm_mask_hi,

4798

&xmm_mask_lo, &xmm_mask_hi);

4799

4800

xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);

4801

xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);

4802

4803

save_128_aligned (

4804

(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

4805

4806

mask += 16;

4807

dst += 16;

4808

w -= 16;

4809

}

4810

4811

while (w)

4812

{

4813

m = (uint32_t) *mask++;

4814

d = (uint32_t) *dst;

4815

4816

*dst++ = (uint8_t) pack_1x64_32 (

4817

_mm_adds_pu16 (

4818

pix_multiply_1x64 (

4819

_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),

4820

unpack_32_1x64 (d)));

4821

4822

w--;

4823

}

4824

}

4825

4826

_mm_empty ();

4827

}

4828

4829

/* -------------------------------------------------------------------------

4830

* composite_add_n_8_8

4831

4832

4833

static void

4834

sse2_composite_add_n_8 (pixman_implementation_t *imp,

4835

pixman_op_t op,

4836

pixman_image_t * src_image,

4837

pixman_image_t * mask_image,

4838

pixman_image_t * dst_image,

4839

int32_t src_x,

4840

int32_t src_y,

4841

int32_t mask_x,

4842

int32_t mask_y,

4843

int32_t dest_x,

4844

int32_t dest_y,

4845

int32_t width,

4846

int32_t height)

4847

{

4848

uint8_t *dst_line, *dst;

4849

int dst_stride;

4850

int32_t w;

4851

uint32_t src;

4852

4853

__m128i xmm_src;

4854

4855

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

4856

4857

4858

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

4859

4860

src >>= 24;

4861

4862

if (src == 0x00)

4863

return;

4864

4865

if (src == 0xff)

4866

{

4867

pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,

4868

8, dest_x, dest_y, width, height, 0xff);

4869

4870

return;

4871

}

4872

4873

src = (src << 24) | (src << 16) | (src << 8) | src;

4874

xmm_src = _mm_set_epi32 (src, src, src, src);

4875

4876

while (height--)

4877

{

4878

dst = dst_line;

4879

dst_line += dst_stride;

4880

w = width;

4881

4882

while (w && ((unsigned long)dst & 15))

4883

{

4884

*dst = (uint8_t)_mm_cvtsi64_si32 (

4885

_mm_adds_pu8 (

4886

_mm_movepi64_pi64 (xmm_src),

4887

_mm_cvtsi32_si64 (*dst)));

4888

4889

w--;

4890

dst++;

4891

}

4892

4893

while (w >= 16)

4894

{

4895

save_128_aligned (

4896

(__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned ((__m128i*)dst)));

4897

4898

dst += 16;

4899

w -= 16;

4900

}

4901

4902

while (w)

4903

{

4904

*dst = (uint8_t)_mm_cvtsi64_si32 (

4905

_mm_adds_pu8 (

4906

_mm_movepi64_pi64 (xmm_src),

4907

_mm_cvtsi32_si64 (*dst)));

4908

4909

w--;

4910

dst++;

4911

}

4912

}

4913

4914

_mm_empty ();

4915

}

4916

4917

/* ----------------------------------------------------------------------

4918

* composite_add_8_8

4919

4920

4921

static void

4922

sse2_composite_add_8_8 (pixman_implementation_t *imp,

4923

pixman_op_t op,

4924

pixman_image_t * src_image,

4925

pixman_image_t * mask_image,

4926

pixman_image_t * dst_image,

4927

int32_t src_x,

4928

int32_t src_y,

4929

int32_t mask_x,

4930

int32_t mask_y,

4931

int32_t dest_x,

4932

int32_t dest_y,

4933

int32_t width,

4934

int32_t height)

4935

{

4936

uint8_t *dst_line, *dst;

4937

uint8_t *src_line, *src;

4938

int dst_stride, src_stride;

4939

int32_t w;

4940

uint16_t t;

4941

4942

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = src_image
->bits.bits; __stride__ = src_image->bits.rowstride; (src_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (src_line) = ((uint8_t *) __bits__) + (src_stride) * (src_y
) + (1) * (src_x); } while (0)

4943

4944

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = dst_image
->bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint8_t
); (dst_line) = ((uint8_t *) __bits__) + (dst_stride) * (dest_y
) + (1) * (dest_x); } while (0)

4945

4946

4947

while (height--)

4948

{

4949

dst = dst_line;

4950

src = src_line;

4951

4952

dst_line += dst_stride;

4953

src_line += src_stride;

4954

w = width;

4955

4956

/* Small head */

4957

while (w && (unsigned long)dst & 3)

4958

{

4959

t = (*dst) + (*src++);

4960

*dst++ = t | (0 - (t >> 8));

4961

w--;

4962

}

4963

4964

core_combine_add_u_sse2 ((uint32_t*)dst, (uint32_t*)src, NULL((void*)0), w >> 2);

4965

4966

/* Small tail */

4967

dst += w & 0xfffc;

4968

src += w & 0xfffc;

4969

4970

w &= 3;

4971

4972

while (w)

4973

{

4974

t = (*dst) + (*src++);

4975

*dst++ = t | (0 - (t >> 8));

4976

w--;

4977

}

4978

}

4979

4980

_mm_empty ();

4981

}

4982

4983

/* ---------------------------------------------------------------------

4984

* composite_add_8888_8888

4985

4986

static void

4987

sse2_composite_add_8888_8888 (pixman_implementation_t *imp,

4988

pixman_op_t op,

4989

pixman_image_t * src_image,

4990

pixman_image_t * mask_image,

4991

pixman_image_t * dst_image,

4992

int32_t src_x,

4993

int32_t src_y,

4994

int32_t mask_x,

4995

int32_t mask_y,

4996

int32_t dest_x,

4997

int32_t dest_y,

4998

int32_t width,

4999

int32_t height)

5000

{

5001

uint32_t *dst_line, *dst;

5002

uint32_t *src_line, *src;

5003

int dst_stride, src_stride;

5004

5005

5006

5007

5008

5009

5010

while (height--)

5011

{

5012

dst = dst_line;

5013

dst_line += dst_stride;

5014

src = src_line;

5015

src_line += src_stride;

5016

5017

core_combine_add_u_sse2 (dst, src, NULL((void*)0), width);

5018

}

5019

5020

_mm_empty ();

5021

}

5022

5023

/* -------------------------------------------------------------------------------------------------

5024

* sse2_composite_copy_area

5025

5026

5027

static pixman_bool_t

5028

pixman_blt_sse2 (uint32_t *src_bits,

5029

uint32_t *dst_bits,

5030

int src_stride,

5031

int dst_stride,

5032

int src_bpp,

5033

int dst_bpp,

5034

int src_x,

5035

int src_y,

5036

int dst_x,

5037

int dst_y,

5038

int width,

5039

int height)

5040

{

5041

uint8_t * src_bytes;

5042

uint8_t * dst_bytes;

5043

int byte_width;

5044

5045

if (src_bpp != dst_bpp)

5046

return FALSE0;

5047

5048

if (src_bpp == 16)

5049

{

5050

src_stride = src_stride * (int) sizeof (uint32_t) / 2;

5051

dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;

5052

src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));

5053

dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));

5054

byte_width = 2 * width;

5055

src_stride *= 2;

5056

dst_stride *= 2;

5057

}

5058

else if (src_bpp == 32)

5059

{

5060

src_stride = src_stride * (int) sizeof (uint32_t) / 4;

5061

dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;

5062

src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));

5063

dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));

5064

byte_width = 4 * width;

5065

src_stride *= 4;

5066

dst_stride *= 4;

5067

}

5068

else

5069

{

5070

return FALSE0;

5071

}

5072

5073

while (height--)

5074

{

5075

int w;

5076

uint8_t *s = src_bytes;

5077

uint8_t *d = dst_bytes;

5078

src_bytes += src_stride;

5079

dst_bytes += dst_stride;

5080

w = byte_width;

5081

5082

while (w >= 2 && ((unsigned long)d & 3))

5083

{

5084

*(uint16_t *)d = *(uint16_t *)s;

5085

w -= 2;

5086

s += 2;

5087

d += 2;

5088

}

5089

5090

while (w >= 4 && ((unsigned long)d & 15))

5091

{

5092

*(uint32_t *)d = *(uint32_t *)s;

5093

5094

w -= 4;

5095

s += 4;

5096

d += 4;

5097

}

5098

5099

while (w >= 64)

5100

{

5101

__m128i xmm0, xmm1, xmm2, xmm3;

5102

5103

xmm0 = load_128_unaligned ((__m128i*)(s));

5104

xmm1 = load_128_unaligned ((__m128i*)(s + 16));

5105

xmm2 = load_128_unaligned ((__m128i*)(s + 32));

5106

xmm3 = load_128_unaligned ((__m128i*)(s + 48));

5107

5108

save_128_aligned ((__m128i*)(d), xmm0);

5109

save_128_aligned ((__m128i*)(d + 16), xmm1);

5110

save_128_aligned ((__m128i*)(d + 32), xmm2);

5111

save_128_aligned ((__m128i*)(d + 48), xmm3);

5112

5113

s += 64;

5114

d += 64;

5115

w -= 64;

5116

}

5117

5118

while (w >= 16)

5119

{

5120

save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );

5121

5122

w -= 16;

5123

d += 16;

5124

s += 16;

5125

}

5126

5127

while (w >= 4)

5128

{

5129

*(uint32_t *)d = *(uint32_t *)s;

5130

5131

w -= 4;

5132

s += 4;

5133

d += 4;

5134

}

5135

5136

if (w >= 2)

5137

{

5138

*(uint16_t *)d = *(uint16_t *)s;

5139

w -= 2;

5140

s += 2;

5141

d += 2;

5142

}

5143

}

5144

5145

_mm_empty ();

5146

5147

return TRUE1;

5148

}

5149

5150

static void

5151

sse2_composite_copy_area (pixman_implementation_t *imp,

5152

pixman_op_t op,

5153

pixman_image_t * src_image,

5154

pixman_image_t * mask_image,

5155

pixman_image_t * dst_image,

5156

int32_t src_x,

5157

int32_t src_y,

5158

int32_t mask_x,

5159

int32_t mask_y,

5160

int32_t dest_x,

5161

int32_t dest_y,

5162

int32_t width,

5163

int32_t height)

5164

{

5165

pixman_blt_sse2 (src_image->bits.bits,

5166

dst_image->bits.bits,

5167

src_image->bits.rowstride,

5168

dst_image->bits.rowstride,

5169

PIXMAN_FORMAT_BPP (src_image->bits.format)(((src_image->bits.format) >> 24) ),

5170

PIXMAN_FORMAT_BPP (dst_image->bits.format)(((dst_image->bits.format) >> 24) ),

5171

src_x, src_y, dest_x, dest_y, width, height);

5172

}

5173

5174

static void

5175

sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,

5176

pixman_op_t op,

5177

pixman_image_t * src_image,

5178

pixman_image_t * mask_image,

5179

pixman_image_t * dst_image,

5180

int32_t src_x,

5181

int32_t src_y,

5182

int32_t mask_x,

5183

int32_t mask_y,

5184

int32_t dest_x,

5185

int32_t dest_y,

5186

int32_t width,

5187

int32_t height)

5188

{

5189

uint32_t *src, *src_line, s;

5190

uint32_t *dst, *dst_line, d;

5191

uint8_t *mask, *mask_line;

5192

uint32_t m;

5193

int src_stride, mask_stride, dst_stride;

5194

int32_t w;

5195

__m64 ms;

5196

5197

__m128i xmm_src, xmm_src_lo, xmm_src_hi;

5198

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

5199

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

5200

5201

5202

5203

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)

5204

5205

5206

5207

5208

while (height--)

5209

{

5210

src = src_line;

5211

src_line += src_stride;

5212

dst = dst_line;

5213

dst_line += dst_stride;

5214

mask = mask_line;

5215

mask_line += mask_stride;

5216

5217

w = width;

5218

5219

while (w && (unsigned long)dst & 15)

5220

{

5221

s = 0xff000000 | *src++;

5222

m = (uint32_t) *mask++;

5223

d = *dst;

5224

ms = unpack_32_1x64 (s);

5225

5226

if (m != 0xff)

5227

{

5228

__m64 ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));

5229

__m64 md = unpack_32_1x64 (d);

5230

5231

ms = in_over_1x64 (&ms, &mask_x00ff, &ma, &md);

5232

}

5233

5234

*dst++ = pack_1x64_32 (ms);

5235

w--;

5236

}

5237

5238

while (w >= 4)

5239

{

5240

m = *(uint32_t*) mask;

5241

xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);

5242

5243

if (m == 0xffffffff)

5244

{

5245

save_128_aligned ((__m128i*)dst, xmm_src);

5246

}

5247

else

5248

{

5249

xmm_dst = load_128_aligned ((__m128i*)dst);

5250

5251

xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());

5252

5253

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

5254

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

5255

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

5256

5257

expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

5258

5259

in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &mask_00ff, &mask_00ff, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);

5260

5261

save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

5262

}

5263

5264

src += 4;

5265

dst += 4;

5266

mask += 4;

5267

w -= 4;

5268

}

5269

5270

while (w)

5271

{

5272

m = (uint32_t) *mask++;

5273

5274

if (m)

5275

{

5276

s = 0xff000000 | *src;

5277

5278

if (m == 0xff)

5279

{

5280

*dst = s;

5281

}

5282

else

5283

{

5284

__m64 ma, md, ms;

5285

5286

d = *dst;

5287

5288

ma = expand_alpha_rev_1x64 (unpack_32_1x64 (m));

5289

md = unpack_32_1x64 (d);

5290

ms = unpack_32_1x64 (s);

5291

5292

*dst = pack_1x64_32 (in_over_1x64 (&ms, &mask_x00ff, &ma, &md));

5293

}

5294

5295

}

5296

5297

src++;

5298

dst++;

5299

w--;

5300

}

5301

}

5302

5303

_mm_empty ();

5304

}

5305

5306

static void

5307

sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,

5308

pixman_op_t op,

5309

pixman_image_t * src_image,

5310

pixman_image_t * mask_image,

5311

pixman_image_t * dst_image,

5312

int32_t src_x,

5313

int32_t src_y,

5314

int32_t mask_x,

5315

int32_t mask_y,

5316

int32_t dest_x,

5317

int32_t dest_y,

5318

int32_t width,

5319

int32_t height)

5320

{

5321

uint32_t *src, *src_line, s;

5322

uint32_t *dst, *dst_line, d;

5323

uint8_t *mask, *mask_line;

5324

uint32_t m;

5325

int src_stride, mask_stride, dst_stride;

5326

int32_t w;

5327

5328

__m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;

5329

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

5330

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

5331

5332

5333

5334

PIXMAN_IMAGE_GET_LINE (do { uint32_t *__bits__; int __stride__; __bits__ = mask_image
->bits.bits; __stride__ = mask_image->bits.rowstride; (
mask_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof
(uint8_t); (mask_line) = ((uint8_t *) __bits__) + (mask_stride
) * (mask_y) + (1) * (mask_x); } while (0)

5335

5336

5337

5338

5339

while (height--)

5340

{

5341

src = src_line;

5342

src_line += src_stride;

5343

dst = dst_line;

5344

dst_line += dst_stride;

5345

mask = mask_line;

5346

mask_line += mask_stride;

5347

5348

w = width;

5349

5350

while (w && (unsigned long)dst & 15)

5351

{

5352

uint32_t sa;

5353

5354

s = *src++;

5355

m = (uint32_t) *mask++;

5356

d = *dst;

5357

5358

sa = s >> 24;

5359

5360

if (m)

5361

{

5362

if (sa == 0xff && m == 0xff)

5363

{

5364

*dst = s;

5365

}

5366

else

5367

{

5368

__m64 ms, md, ma, msa;

5369

5370

ma = expand_alpha_rev_1x64 (load_32_1x64 (m));

5371

ms = unpack_32_1x64 (s);

5372

md = unpack_32_1x64 (d);

5373

5374

msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));

5375

5376

*dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));

5377

}

5378

}

5379

5380

dst++;

5381

w--;

5382

}

5383

5384

while (w >= 4)

5385

{

5386

m = *(uint32_t *) mask;

5387

5388

if (m)

5389

{

5390

xmm_src = load_128_unaligned ((__m128i*)src);

5391

5392

if (m == 0xffffffff && is_opaque (xmm_src))

5393

{

5394

save_128_aligned ((__m128i *)dst, xmm_src);

5395

}

5396

else

5397

{

5398

xmm_dst = load_128_aligned ((__m128i *)dst);

5399

5400

xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());

5401

5402

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

5403

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

5404

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

5405

5406

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);

5407

expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

5408

5409

in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,

5410

&xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);

5411

5412

save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

5413

}

5414

}

5415

5416

src += 4;

5417

dst += 4;

5418

mask += 4;

5419

w -= 4;

5420

}

5421

5422

while (w)

5423

{

5424

uint32_t sa;

5425

5426

s = *src++;

5427

m = (uint32_t) *mask++;

5428

d = *dst;

5429

5430

sa = s >> 24;

5431

5432

if (m)

5433

{

5434

if (sa == 0xff && m == 0xff)

5435

{

5436

*dst = s;

5437

}

5438

else

5439

{

5440

__m64 ms, md, ma, msa;

5441

5442

ma = expand_alpha_rev_1x64 (load_32_1x64 (m));

5443

ms = unpack_32_1x64 (s);

5444

md = unpack_32_1x64 (d);

5445

5446

msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));

5447

5448

*dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));

5449

}

5450

}

5451

5452

dst++;

5453

w--;

5454

}

5455

}

5456

5457

_mm_empty ();

5458

}

5459

5460

static void

5461

sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,

5462

pixman_op_t op,

5463

pixman_image_t * src_image,

5464

pixman_image_t * mask_image,

5465

pixman_image_t * dst_image,

5466

int32_t src_x,

5467

int32_t src_y,

5468

int32_t mask_x,

5469

int32_t mask_y,

5470

int32_t dest_x,

5471

int32_t dest_y,

5472

int32_t width,

5473

int32_t height)

5474

{

5475

uint32_t src;

5476

uint32_t *dst_line, *dst;

5477

__m128i xmm_src;

5478

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

5479

__m128i xmm_dsta_hi, xmm_dsta_lo;

5480

int dst_stride;

5481

int32_t w;

5482

5483

src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);

5484

5485

if (src == 0)

5486

return;

5487

5488

5489

5490

5491

xmm_src = expand_pixel_32_1x128 (src);

5492

5493

while (height--)

5494

{

5495

dst = dst_line;

5496

5497

dst_line += dst_stride;

5498

w = width;

5499

5500

while (w && (unsigned long)dst & 15)

5501

{

5502

__m64 vd;

5503

5504

vd = unpack_32_1x64 (*dst);

5505

5506

*dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),

5507

_mm_movepi64_pi64 (xmm_src)));

5508

w--;

5509

dst++;

5510

}

5511

5512

while (w >= 4)

5513

{

5514

__m128i tmp_lo, tmp_hi;

5515

5516

xmm_dst = load_128_aligned ((__m128i*)dst);

5517

5518

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

5519

expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dsta_lo, &xmm_dsta_hi);

5520

5521

tmp_lo = xmm_src;

5522

tmp_hi = xmm_src;

5523

5524

over_2x128 (&xmm_dst_lo, &xmm_dst_hi,

5525

&xmm_dsta_lo, &xmm_dsta_hi,

5526

&tmp_lo, &tmp_hi);

5527

5528

save_128_aligned (

5529

(__m128i*)dst, pack_2x128_128 (tmp_lo, tmp_hi));

5530

5531

w -= 4;

5532

dst += 4;

5533

}

5534

5535

while (w)

5536

{

5537

__m64 vd;

5538

5539

vd = unpack_32_1x64 (*dst);

5540

5541

*dst = pack_1x64_32 (over_1x64 (vd, expand_alpha_1x64 (vd),

5542

_mm_movepi64_pi64 (xmm_src)));

5543

w--;

5544

dst++;

5545

}

5546

5547

}

5548

5549

_mm_empty ();

5550

}

5551

5552

static void

5553

sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,

5554

pixman_op_t op,

5555

pixman_image_t * src_image,

5556

pixman_image_t * mask_image,

5557

pixman_image_t * dst_image,

5558

int32_t src_x,

5559

int32_t src_y,

5560

int32_t mask_x,

5561

int32_t mask_y,

5562

int32_t dest_x,

5563

int32_t dest_y,

5564

int32_t width,

5565

int32_t height)

5566

{

5567

uint32_t *src, *src_line, s;

5568

uint32_t *dst, *dst_line, d;

5569

uint32_t *mask, *mask_line;

5570

uint32_t m;

5571

int src_stride, mask_stride, dst_stride;

5572

int32_t w;

5573

5574

__m128i xmm_src, xmm_src_lo, xmm_src_hi, xmm_srca_lo, xmm_srca_hi;

5575

__m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;

5576

__m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;

5577

5578

5579

5580

5581

5582

5583

5584

5585

while (height--)

5586

{

5587

src = src_line;

5588

src_line += src_stride;

5589

dst = dst_line;

5590

dst_line += dst_stride;

5591

mask = mask_line;

5592

mask_line += mask_stride;

5593

5594

w = width;

5595

5596

while (w && (unsigned long)dst & 15)

5597

{

5598

uint32_t sa;

5599

5600

s = *src++;

5601

m = (*mask++) >> 24;

5602

d = *dst;

5603

5604

sa = s >> 24;

5605

5606

if (m)

5607

{

5608

if (sa == 0xff && m == 0xff)

5609

{

5610

*dst = s;

5611

}

5612

else

5613

{

5614

__m64 ms, md, ma, msa;

5615

5616

ma = expand_alpha_rev_1x64 (load_32_1x64 (m));

5617

ms = unpack_32_1x64 (s);

5618

md = unpack_32_1x64 (d);

5619

5620

msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));

5621

5622

*dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));

5623

}

5624

}

5625

5626

dst++;

5627

w--;

5628

}

5629

5630

while (w >= 4)

5631

{

5632

xmm_mask = load_128_unaligned ((__m128i*)mask);

5633

5634

if (!is_transparent (xmm_mask))

5635

{

5636

xmm_src = load_128_unaligned ((__m128i*)src);

5637

5638

if (is_opaque (xmm_mask) && is_opaque (xmm_src))

5639

{

5640

save_128_aligned ((__m128i *)dst, xmm_src);

5641

}

5642

else

5643

{

5644

xmm_dst = load_128_aligned ((__m128i *)dst);

5645

5646

unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);

5647

unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);

5648

unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);

5649

5650

expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi);

5651

expand_alpha_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);

5652

5653

in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_srca_lo, &xmm_srca_hi,

5654

&xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);

5655

5656

save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

5657

}

5658

}

5659

5660

src += 4;

5661

dst += 4;

5662

mask += 4;

5663

w -= 4;

5664

}

5665

5666

while (w)

5667

{

5668

uint32_t sa;

5669

5670

s = *src++;

5671

m = (*mask++) >> 24;

5672

d = *dst;

5673

5674

sa = s >> 24;

5675

5676

if (m)

5677

{

5678

if (sa == 0xff && m == 0xff)

5679

{

5680

*dst = s;

5681

}

5682

else

5683

{

5684

__m64 ms, md, ma, msa;

5685

5686

ma = expand_alpha_rev_1x64 (load_32_1x64 (m));

5687

ms = unpack_32_1x64 (s);

5688

md = unpack_32_1x64 (d);

5689

5690

msa = expand_alpha_rev_1x64 (load_32_1x64 (sa));

5691

5692

*dst = pack_1x64_32 (in_over_1x64 (&ms, &msa, &ma, &md));

5693

}

5694

}

5695

5696

dst++;

5697

w--;

5698

}

5699

}

5700

5701

_mm_empty ();

5702

}

5703

5704

/* A variant of 'core_combine_over_u_sse2' with minor tweaks */

5705

static force_inline__inline__ __attribute__ ((__always_inline__)) void

5706

scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,

5707

const uint32_t* ps,

5708

int32_t w,

5709

pixman_fixed_t vx,

5710

pixman_fixed_t unit_x,

5711

pixman_fixed_t max_vx)

5712

{

5713

uint32_t s, d;

5714

const uint32_t* pm = NULL((void*)0);

5715

5716

__m128i xmm_dst_lo, xmm_dst_hi;

5717

__m128i xmm_src_lo, xmm_src_hi;

5718

__m128i xmm_alpha_lo, xmm_alpha_hi;

5719

5720

/* Align dst on a 16-byte boundary */

5721

while (w && ((unsigned long)pd & 15))

5722

{

5723

d = *pd;

5724

s = combine1 (ps + (vx >> 16), pm);

5725

vx += unit_x;

5726

5727

*pd++ = core_combine_over_u_pixel_sse2 (s, d);

5728

if (pm)

5729

pm++;

5730

w--;

5731

}

5732

5733

while (w >= 4)

5734

{

5735

__m128i tmp;

5736

uint32_t tmp1, tmp2, tmp3, tmp4;

5737

5738

tmp1 = ps[vx >> 16];

5739

vx += unit_x;

5740

tmp2 = ps[vx >> 16];

5741

vx += unit_x;

5742

tmp3 = ps[vx >> 16];

5743

vx += unit_x;

5744

tmp4 = ps[vx >> 16];

5745

vx += unit_x;

5746

5747

tmp = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);

5748

5749

xmm_src_hi = combine4 ((__m128i*)&tmp, (__m128i*)pm);

5750

5751

if (is_opaque (xmm_src_hi))

5752

{

5753

save_128_aligned ((__m128i*)pd, xmm_src_hi);

5754

}

5755

else if (!is_zero (xmm_src_hi))

5756

{

5757

xmm_dst_hi = load_128_aligned ((__m128i*) pd);

5758

5759

unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);

5760

unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);

5761

5762

expand_alpha_2x128 (

5763

xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);

5764

5765

over_2x128 (&xmm_src_lo, &xmm_src_hi,

5766

&xmm_alpha_lo, &xmm_alpha_hi,

5767

&xmm_dst_lo, &xmm_dst_hi);

5768

5769

/* rebuid the 4 pixel data and save*/

5770

save_128_aligned ((__m128i*)pd,

5771

pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));

5772

}

5773

5774

w -= 4;

5775

pd += 4;

5776

if (pm)

5777

pm += 4;

5778

}

5779

5780

while (w)

5781

{

5782

d = *pd;

5783

s = combine1 (ps + (vx >> 16), pm);

5784

vx += unit_x;

5785

5786

*pd++ = core_combine_over_u_pixel_sse2 (s, d);

5787

if (pm)

5788

pm++;

5789

5790

w--;

5791

}

5792

_mm_empty ();

5793

}

5794

5795

FAST_NEAREST_MAINLOOP (sse2_8888_8888_cover_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
-1 == PIXMAN_REPEAT_NORMAL) { max_vx = src_image->bits.width
<< 16; max_vy = src_image->bits.height << 16;
repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &
vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { static uint32_t
zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.
height) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero
, left_pad + width + right_pad, 0, 0, 0); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }

5796

scaled_nearest_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
-1 == PIXMAN_REPEAT_NORMAL) { max_vx = src_image->bits.width
<< 16; max_vy = src_image->bits.height << 16;
repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &
vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { static uint32_t
zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.
height) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero
, left_pad + width + right_pad, 0, 0, 0); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }

5797

uint32_t, uint32_t, COVER)static void fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
-1 == PIXMAN_REPEAT_NORMAL) { max_vx = src_image->bits.width
<< 16; max_vy = src_image->bits.height << 16;
repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); } if (-1 == PIXMAN_REPEAT_PAD || -1 == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(-1 == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL, &
vy, max_vy); if (-1 == PIXMAN_REPEAT_PAD) { repeat (PIXMAN_REPEAT_PAD
, &y, src_image->bits.height); src = src_first_line + src_stride
* y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (-1 == PIXMAN_REPEAT_NONE) { static uint32_t
zero[1] = { 0 }; if (y < 0 || y >= src_image->bits.
height) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero
, left_pad + width + right_pad, 0, 0, 0); continue; } src = src_first_line
+ src_stride * y; if (left_pad > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }

5798

FAST_NEAREST_MAINLOOP (sse2_8888_8888_none_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }

5799

scaled_nearest_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }

5800

uint32_t, uint32_t, NONE)static void fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_NONE
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_NONE == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }

5801

FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }

Within the expansion of the macro 'FAST_NEAREST_MAINLOOP':

a	Assigned value is garbage or undefined

5802

scaled_nearest_scanline_sse2_8888_8888_OVER,static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }

5803

uint32_t, uint32_t, PAD)static void fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
(pixman_implementation_t *imp, pixman_op_t op, pixman_image_t
* src_image, pixman_image_t * mask_image, pixman_image_t * dst_image
, int32_t src_x, int32_t src_y, int32_t mask_x, int32_t mask_y
, int32_t dst_x, int32_t dst_y, int32_t width, int32_t height
) { uint32_t *dst_line; uint32_t *src_first_line; int y; pixman_fixed_t
max_vx = max_vx; pixman_fixed_t max_vy; pixman_vector_t v; pixman_fixed_t
vx, vy; pixman_fixed_t unit_x, unit_y; int32_t left_pad, right_pad
; uint32_t *src; uint32_t *dst; int src_stride, dst_stride; do
{ uint32_t *__bits__; int __stride__; __bits__ = dst_image->
bits.bits; __stride__ = dst_image->bits.rowstride; (dst_stride
) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (uint32_t
); (dst_line) = ((uint32_t *) __bits__) + (dst_stride) * (dst_y
) + (1) * (dst_x); } while (0); do { uint32_t *__bits__; int __stride__
; __bits__ = src_image->bits.bits; __stride__ = src_image->
bits.rowstride; (src_stride) = __stride__ * (int) sizeof (uint32_t
) / (int) sizeof (uint32_t); (src_first_line) = ((uint32_t *)
__bits__) + (src_stride) * (0) + (1) * (0); } while (0); v.vector
[0] = ((pixman_fixed_t) ((src_x) << 16)) + (((pixman_fixed_t
) ((1) << 16))) / 2; v.vector[1] = ((pixman_fixed_t) ((
src_y) << 16)) + (((pixman_fixed_t) ((1) << 16)))
/ 2; v.vector[2] = (((pixman_fixed_t) ((1) << 16))); if
(!pixman_transform_point_3d (src_image->common.transform,
&v)) return; unit_x = src_image->common.transform->
matrix[0][0]; unit_y = src_image->common.transform->matrix
[1][1]; v.vector[0] -= ((pixman_fixed_t) 1); v.vector[1] -= (
(pixman_fixed_t) 1); vx = v.vector[0]; vy = v.vector[1]; if (
PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) { max_vx = src_image
->bits.width << 16; max_vy = src_image->bits.height
<< 16; repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);
repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); } if (PIXMAN_REPEAT_PAD
== PIXMAN_REPEAT_PAD || PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { pad_repeat_get_scanline_bounds (src_image->bits.width,
vx, unit_x, &width, &left_pad, &right_pad); vx +=
left_pad * unit_x; } while (--height >= 0) { dst = dst_line
; dst_line += dst_stride; y = vy >> 16; vy += unit_y; if
(PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NORMAL) repeat (PIXMAN_REPEAT_NORMAL
, &vy, max_vy); if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_PAD
) { repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height
); src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, src, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, src + src_image->bits.width - 1, right_pad
, 0, 0, 0); } } else if (PIXMAN_REPEAT_PAD == PIXMAN_REPEAT_NONE
) { static uint32_t zero[1] = { 0 }; if (y < 0 || y >= src_image
->bits.height) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, zero, left_pad + width + right_pad, 0, 0, 0); continue
; } src = src_first_line + src_stride * y; if (left_pad > 0
) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst, zero, left_pad
, 0, 0, 0); } if (width > 0) { scaled_nearest_scanline_sse2_8888_8888_OVER
(dst + left_pad, src, width, vx, unit_x, 0); } if (right_pad
> 0) { scaled_nearest_scanline_sse2_8888_8888_OVER (dst +
left_pad + width, zero, right_pad, 0, 0, 0); } } else { src =
src_first_line + src_stride * y; scaled_nearest_scanline_sse2_8888_8888_OVER
(dst, src, width, vx, unit_x, max_vx); } } }

5804

5805

static const pixman_fast_path_t sse2_fast_paths[] =

5806

{

5807

/* PIXMAN_OP_OVER */

5808

PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_r5g6b5, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_over_n_8_0565 },

5809

PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, sse2_composite_over_n_8_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_b5g6r5, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_over_n_8_0565 },

5810

PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888
},

5811

PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, sse2_composite_over_n_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888
},

5812

PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, sse2_composite_over_n_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_0565
},

5813

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_8888 },

5814

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_8888 },

5815

PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_8888 },

5816

PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, sse2_composite_over_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_8888 },

5817

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_r5g6b5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_8888_0565 },

5818

PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, sse2_composite_over_8888_0565){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_b5g6r5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_over_8888_0565 },

5819

PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
},

5820

PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
},

5821

PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
},

5822

PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, sse2_composite_over_n_8_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_over_n_8_8888
},

5823

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, sse2_composite_over_8888_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8
== (((0) << 24) | ((1) << 16) | ((0) << 12
) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_8888_8888_8888
},

5824

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, x8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
},

5825

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, a8r8g8b8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
},

5826

PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
},

5827

PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, a8b8g8r8, sse2_composite_over_8888_8_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_8888_8_8888
},

5828

PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
},

5829

PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
},

5830

PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
},

5831

PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, sse2_composite_over_x888_8_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0)
<< 24) | ((1) << 16) | ((0) << 12) | ((0) <<
8) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_x888_8_8888
},

5832

PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, a8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_x888_n_8888 },

5833

PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, solid, x8r8g8b8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_x888_n_8888 },

5834

PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, a8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_x888_n_8888 },

5835

PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, solid, x8b8g8r8, sse2_composite_over_x888_n_8888){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_x888_n_8888 },

5836

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_n_8888 },

5837

PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_n_8888 },

5838

PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_n_8888 },

5839

PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, sse2_composite_over_8888_n_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((1) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_over_8888_n_8888 },

5840

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca
},

5841

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_x8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca
},

5842

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_a8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca
},

5843

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, sse2_composite_over_n_8888_8888_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_x8b8g8r8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_8888_ca
},

5844

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_r5g6b5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_0565_ca
},

5845

PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, sse2_composite_over_n_8888_0565_ca){ PIXMAN_OP_OVER, (((0) << 24) | ((1) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8b8g8r8, ((PIXMAN_a8b8g8r8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_b5g6r5, (
(1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_n_8888_0565_ca
},

5846

PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, a8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((2) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
},

5847

PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, x8r8g8b8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((2) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
},

5848

PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, a8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((3) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
},

5849

PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, x8b8g8r8, sse2_composite_over_pixbuf_8888){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((3) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_8888
},

5850

PIXMAN_STD_FAST_PATH (OVER, pixbuf, pixbuf, r5g6b5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((2) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((2) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((2) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((2) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_r5g6b5
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565
},

5851

PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565){ PIXMAN_OP_OVER, (((0) << 24) | ((3) << 16) | ((
0) << 12) | ((0) << 8) | ((0) << 4) | ((0))
), (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((3) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((3) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((3) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_b5g6r5
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_over_pixbuf_0565
},

5852

PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area },

5853

PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area },

5854

5855

/* PIXMAN_OP_OVER_REVERSE */

5856

PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((1 << 2) | (1 << 5) | (1 << 1) |
(1 << 6)) | (((((0) << 24) | ((1) << 16) |
((0) << 12) | ((0) << 8) | ((0) << 4) | ((
0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))), (((0) << 24) | ((0)
<< 16) | ((0) << 12) | ((0) << 8) | ((0) <<
4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))) ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((
((0) << 24) | ((0) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888
},

5857

PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888){ PIXMAN_OP_OVER_REVERSE, (((0) << 24) | ((1) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((1 << 2) | (1 << 5) | (1 << 1) |
(1 << 6)) | (((((0) << 24) | ((1) << 16) |
((0) << 12) | ((0) << 8) | ((0) << 4) | ((
0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))), (((0) << 24) | ((0)
<< 16) | ((0) << 12) | ((0) << 8) | ((0) <<
4) | ((0))), (((((0) << 24) | ((0) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))) ==
(((0) << 24) | ((0) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((((1 <<
2) | (1 << 5) | (1 << 1) | (1 << 6)) | (((
((0) << 24) | ((0) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) |
(1 << 1) | (1 << 6)), sse2_composite_over_reverse_n_8888
},

5858

5859

/* PIXMAN_OP_ADD */

5860

PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, sse2_composite_add_n_8888_8888_ca){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8r8g8b8, ((PIXMAN_a8r8g8b8 == (((0) << 24
) | ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))) | (1 << 8))), PIXMAN_a8r8g8b8,
((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8888_8888_ca
},

5861

PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, sse2_composite_add_8_8){ PIXMAN_OP_ADD, PIXMAN_a8, (((1 << 2) | (1 << 5)
| (1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_8_8
},

5862

PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_add_8888_8888 },

5863

PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888){ PIXMAN_OP_ADD, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_add_8888_8888 },

5864

PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_add_n_8_8 },

5865

PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8){ PIXMAN_OP_ADD, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_add_n_8
},

5866

5867

/* PIXMAN_OP_SRC */

5868

PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8r8g8b8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 },

5869

PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_x8r8g8b8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 },

5870

PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8b8g8r8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 },

5871

PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888){ PIXMAN_OP_SRC, (((0) << 24) | ((1) << 16) | ((0
) << 12) | ((0) << 8) | ((0) << 4) | ((0)))
, (((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_x8b8g8r8, ((1 << 5) | (
1 << 1) | (1 << 6)), sse2_composite_src_n_8_8888 },

5872

PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_src_x888_8888 },

5873

PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_src_x888_8888 },

5874

PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area },

5875

PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_a8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area },

5876

PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area },

5877

PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_a8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area },

5878

PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8r8g8b8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8r8g8b8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area },

5879

PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_x8b8g8r8 ==
(((0) << 24) | ((1) << 16) | ((0) << 12) |
((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_x8b8g8r8, ((1 << 5) | (1 << 1) | (1
<< 6)), sse2_composite_copy_area },

5880

PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_r5g6b5, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_r5g6b5 == (
((0) << 24) | ((1) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_r5g6b5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_copy_area },

5881

PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, sse2_composite_copy_area){ PIXMAN_OP_SRC, PIXMAN_b5g6r5, (((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | ((PIXMAN_b5g6r5 == (
((0) << 24) | ((1) << 16) | ((0) << 12) | (
(0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((1 <<
16) | (1 << 0)))), (((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))), (((((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((((1 << 2) | (1 <<
5) | (1 << 1) | (1 << 6)) | (((((0) << 24)
| ((0) << 16) | ((0) << 12) | ((0) << 8) |
((0) << 4) | ((0))) == (((0) << 24) | ((1) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((1 << 16) | (1 << 0)))) | (1 <<
9))), PIXMAN_b5g6r5, ((1 << 5) | (1 << 1) | (1 <<
6)), sse2_composite_copy_area },

5882

5883

/* PIXMAN_OP_IN */

5884

PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8){ PIXMAN_OP_IN, PIXMAN_a8, (((1 << 2) | (1 << 5) |
(1 << 1) | (1 << 6)) | ((PIXMAN_a8 == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_8_8
},

5885

PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))),
(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), PIXMAN_a8, ((PIXMAN_a8 == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | ((PIXMAN_a8 == (((0) << 24) | (
(1) << 16) | ((0) << 12) | ((0) << 8) | ((0
) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 << 0
)))) | (1 << 9))), PIXMAN_a8, ((1 << 5) | (1 <<
1) | (1 << 6)), sse2_composite_in_n_8_8 },

5886

PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8){ PIXMAN_OP_IN, (((0) << 24) | ((1) << 16) | ((0)
<< 12) | ((0) << 8) | ((0) << 4) | ((0))),
(((1 << 2) | (1 << 5) | (1 << 1) | (1 <<
6)) | (((((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))) == (((0) <<
24) | ((1) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0)))) ? 0 : ((1 << 16) | (1 <<
0)))), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), (((((0) <<
24) | ((0) << 16) | ((0) << 12) | ((0) << 8
) | ((0) << 4) | ((0))) == (((0) << 24) | ((0) <<
16) | ((0) << 12) | ((0) << 8) | ((0) << 4
) | ((0)))) ? 0 : ((((1 << 2) | (1 << 5) | (1 <<
1) | (1 << 6)) | (((((0) << 24) | ((0) << 16
) | ((0) << 12) | ((0) << 8) | ((0) << 4) |
((0))) == (((0) << 24) | ((1) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0)))) ? 0 : ((
1 << 16) | (1 << 0)))) | (1 << 9))), PIXMAN_a8
, ((1 << 5) | (1 << 1) | (1 << 6)), sse2_composite_in_n_8
},

5887

5888

SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 16), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, },

5889

SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 16), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, },

5890

SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 16), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, },

5891

SIMPLE_NEAREST_FAST_PATH_COVER (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, ((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | (1
<< 16), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_cover_OVER
, },

5892

SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 14) | (1 << 3) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
, },

5893

SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 14) | (1 << 3) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
, },

5894

SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 14) | (1 << 3) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
, },

5895

SIMPLE_NEAREST_FAST_PATH_NONE (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 14) | (1 << 3) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_none_OVER
, },

5896

SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, },

5897

SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_x8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, },

5898

SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8r8g8b8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, },

5899

SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888){ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, (((1 << 10) | (1 <<
1) | (1 << 11) | (1 << 5) | (1 << 6)) | ((
1 << 15) | (1 << 14) | (1 << 4)) | (1 <<
17)), (((0) << 24) | ((0) << 16) | ((0) <<
12) | ((0) << 8) | ((0) << 4) | ((0))), 0, PIXMAN_a8b8g8r8
, ((1 << 5) | (1 << 1) | (1 << 6)), fast_composite_scaled_nearest_sse2_8888_8888_pad_OVER
, },

5900

5901

{ PIXMAN_OP_NONE },

5902

};

5903

5904

static pixman_bool_t

5905

sse2_blt (pixman_implementation_t *imp,

5906

uint32_t * src_bits,

5907

uint32_t * dst_bits,

5908

int src_stride,

5909

int dst_stride,

5910

int src_bpp,

5911

int dst_bpp,

5912

int src_x,

5913

int src_y,

5914

int dst_x,

5915

int dst_y,

5916

int width,

5917

int height)

5918

{

5919

if (!pixman_blt_sse2 (

5920

src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,

5921

src_x, src_y, dst_x, dst_y, width, height))

5922

5923

{

5924

return _pixman_implementation_blt (

5925

imp->delegate,

5926

src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,

5927

src_x, src_y, dst_x, dst_y, width, height);

5928

}

5929

5930

return TRUE1;

5931

}

5932

5933

#if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1)

5934

__attribute__((__force_align_arg_pointer__))

5935

#endif

5936

static pixman_bool_t

5937

sse2_fill (pixman_implementation_t *imp,

5938

uint32_t * bits,

5939

int stride,

5940

int bpp,

5941

int x,

5942

int y,

5943

int width,

5944

int height,

5945

uint32_t xor)

5946

{

5947

if (!pixman_fill_sse2 (bits, stride, bpp, x, y, width, height, xor))

5948

{

5949

return _pixman_implementation_fill (

5950

imp->delegate, bits, stride, bpp, x, y, width, height, xor);

5951

}

5952

5953

return TRUE1;

5954

}

5955

5956

#if defined(__GNUC__4) && !defined(__x86_64__1) && !defined(__amd64__1)

5957

__attribute__((__force_align_arg_pointer__))

5958

#endif

5959

pixman_implementation_t *

5960

_pixman_implementation_create_sse2 (void)

5961

{

5962

#ifdef USE_MMX1

5963

pixman_implementation_t *fallback = _pixman_implementation_create_mmx ();

5964

#else

5965

pixman_implementation_t *fallback = _pixman_implementation_create_fast_path ();

5966

#endif

5967

pixman_implementation_t *imp = _pixman_implementation_create (fallback, sse2_fast_paths);

5968

5969

/* SSE2 constants */

5970

mask_565_r = create_mask_2x32_128 (0x00f80000, 0x00f80000);

5971

mask_565_g1 = create_mask_2x32_128 (0x00070000, 0x00070000);

5972

mask_565_g2 = create_mask_2x32_128 (0x000000e0, 0x000000e0);

5973

mask_565_b = create_mask_2x32_128 (0x0000001f, 0x0000001f);

5974

mask_red = create_mask_2x32_128 (0x00f80000, 0x00f80000);

5975

mask_green = create_mask_2x32_128 (0x0000fc00, 0x0000fc00);

5976

mask_blue = create_mask_2x32_128 (0x000000f8, 0x000000f8);

5977

mask_565_fix_rb = create_mask_2x32_128 (0x00e000e0, 0x00e000e0);

5978

mask_565_fix_g = create_mask_2x32_128 (0x0000c000, 0x0000c000);

5979

mask_0080 = create_mask_16_128 (0x0080);

5980

mask_00ff = create_mask_16_128 (0x00ff);

5981

mask_0101 = create_mask_16_128 (0x0101);

5982

mask_ffff = create_mask_16_128 (0xffff);

5983

mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);

5984

mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);

5985

5986

/* MMX constants */

5987

mask_x565_rgb = create_mask_2x32_64 (0x000001f0, 0x003f001f);

5988

mask_x565_unpack = create_mask_2x32_64 (0x00000084, 0x04100840);

5989

5990

mask_x0080 = create_mask_16_64 (0x0080);

5991

mask_x00ff = create_mask_16_64 (0x00ff);

5992

mask_x0101 = create_mask_16_64 (0x0101);

5993

mask_x_alpha = create_mask_2x32_64 (0x00ff0000, 0x00000000);

5994

5995

_mm_empty ();

5996

5997

/* Set up function pointers */

5998

5999

/* SSE code patch for fbcompose.c */

6000

imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;

6001

imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;

6002

imp->combine_32[PIXMAN_OP_IN] = sse2_combine_in_u;

6003

imp->combine_32[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_u;

6004

imp->combine_32[PIXMAN_OP_OUT] = sse2_combine_out_u;

6005

imp->combine_32[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_u;

6006

imp->combine_32[PIXMAN_OP_ATOP] = sse2_combine_atop_u;

6007

imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;

6008

imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;

6009

imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;

6010

6011

imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;

6012

6013

imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;

6014

imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;

6015

imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;

6016

imp->combine_32_ca[PIXMAN_OP_IN] = sse2_combine_in_ca;

6017

imp->combine_32_ca[PIXMAN_OP_IN_REVERSE] = sse2_combine_in_reverse_ca;

6018

imp->combine_32_ca[PIXMAN_OP_OUT] = sse2_combine_out_ca;

6019

imp->combine_32_ca[PIXMAN_OP_OUT_REVERSE] = sse2_combine_out_reverse_ca;

6020

imp->combine_32_ca[PIXMAN_OP_ATOP] = sse2_combine_atop_ca;

6021

imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;

6022

imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;

6023

imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;

6024

6025

imp->blt = sse2_blt;

6026

imp->fill = sse2_fill;

6027

6028

return imp;

6029

}

6030

6031

#endif /* USE_SSE2 */