Clover Git
OpenCL 1.1 software implementation
|
00001 /* 00002 * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr> 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions are met: 00007 * * Redistributions of source code must retain the above copyright 00008 * notice, this list of conditions and the following disclaimer. 00009 * * Redistributions in binary form must reproduce the above copyright 00010 * notice, this list of conditions and the following disclaimer in the 00011 * documentation and/or other materials provided with the distribution. 00012 * * Neither the name of the copyright holder nor the 00013 * names of its contributors may be used to endorse or promote products 00014 * derived from this software without specific prior written permission. 00015 * 00016 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 00017 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00018 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00019 * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY 00020 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00021 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00022 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00023 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00024 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00025 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00026 */ 00027 00037 #include "../memobject.h" 00038 #include "../sampler.h" 00039 #include "kernel.h" 00040 #include "buffer.h" 00041 #include "builtins.h" 00042 00043 #include <cstdlib> 00044 #include <cmath> 00045 #include <immintrin.h> 00046 00047 using namespace Coal; 00048 00049 /* 00050 * Helper functions 00051 */ 00052 00053 static int clamp(int a, int b, int c) 00054 { 00055 return (a < b) ? b : ((a > c) ? c : a); 00056 } 00057 00058 static int min(int a, int b) 00059 { 00060 return (a < b ? a : b); 00061 } 00062 00063 static int max(int a, int b) 00064 { 00065 return (a > b ? a : b); 00066 } 00067 00068 static float frac(float x) 00069 { 00070 return x - std::floor(x); 00071 } 00072 00073 static float round(float x) 00074 { 00075 return (float)(int)x; 00076 } 00077 00078 static bool handle_address_mode(Image2D *image, int &x, int &y, int &z, 00079 uint32_t sampler) 00080 { 00081 bool is_3d = (image->type() == MemObject::Image3D); 00082 int w = image->width(), 00083 h = image->height(), 00084 d = (is_3d ? ((Image3D *)image)->depth() : 1); 00085 00086 if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP_TO_EDGE) 00087 { 00088 x = clamp(x, 0, w - 1); 00089 y = clamp(y, 0, h - 1); 00090 if (is_3d) z = clamp(z, 0, d - 1); 00091 } 00092 else if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP) 00093 { 00094 x = clamp(x, 0, w); 00095 y = clamp(y, 0, h); 00096 if (is_3d) z = clamp(z, 0, d); 00097 } 00098 00099 return (x == w || y == h || z == d); 00100 } 00101 00102 /* 00103 * Macros or functions used to accelerate the functions 00104 */ 00105 #ifndef __has_builtin 00106 #define __has_builtin(x) 0 00107 #endif 00108 00109 static void slow_shuffle4(uint32_t *rs, uint32_t *a, uint32_t *b, 00110 int x, int y, int z, int w) 00111 { 00112 rs[0] = (x < 4 ? a[x] : b[x - 4]); 00113 rs[1] = (y < 4 ? a[y] : b[y - 4]); 00114 rs[2] = (z < 4 ? a[z] : b[z - 4]); 00115 rs[3] = (w < 4 ? a[w] : b[w - 4]); 00116 } 00117 00118 static void convert_to_format(void *dest, float *data, 00119 cl_channel_type type, unsigned int channels) 00120 { 00121 // Convert always the four components of source to target 00122 if (type == CL_FLOAT) 00123 std::memcpy(dest, data, channels * sizeof(float)); 00124 00125 for (unsigned int i=0; i<channels; ++i) 00126 { 00127 switch (type) 00128 { 00129 case CL_SNORM_INT8: 00130 ((int8_t *)dest)[i] = data[i] * 128.0f; 00131 break; 00132 case CL_SNORM_INT16: 00133 ((int16_t *)dest)[i] = data[i] * 32767.0f; 00134 break; 00135 case CL_UNORM_INT8: 00136 ((uint8_t *)dest)[i] = data[i] * 255.0f; 00137 break; 00138 case CL_UNORM_INT16: 00139 ((uint16_t *)dest)[i] = data[i] * 65535.0f; 00140 break; 00141 } 00142 } 00143 } 00144 00145 static void convert_from_format(float *data, void *source, 00146 cl_channel_type type, unsigned int channels) 00147 { 00148 // Convert always the four components of source to target 00149 if (type == CL_FLOAT) 00150 std::memcpy(data, source, channels * sizeof(float)); 00151 00152 for (unsigned int i=0; i<channels; ++i) 00153 { 00154 switch (type) 00155 { 00156 case CL_SNORM_INT8: 00157 data[i] = (float)((int8_t *)source)[i] / 127.0f; 00158 break; 00159 case CL_SNORM_INT16: 00160 data[i] = (float)((int16_t *)source)[i] / 32767.0f; 00161 break; 00162 case CL_UNORM_INT8: 00163 data[i] = (float)((uint8_t *)source)[i] / 127.0f; 00164 break; 00165 case CL_UNORM_INT16: 00166 data[i] = (float)((uint16_t *)source)[i] / 127.0f; 00167 break; 00168 } 00169 } 00170 } 00171 00172 static void convert_to_format(void *dest, int *data, 00173 cl_channel_type type, unsigned int channels) 00174 { 00175 // Convert always the four components of source to target 00176 if (type == CL_SIGNED_INT32) 00177 std::memcpy(dest, data, channels * sizeof(int32_t)); 00178 00179 for (unsigned int i=0; i<channels; ++i) 00180 { 00181 switch (type) 00182 { 00183 case CL_SIGNED_INT8: 00184 ((int8_t *)dest)[i] = data[i]; 00185 break; 00186 case CL_SIGNED_INT16: 00187 ((int16_t *)dest)[i] = data[i]; 00188 break; 00189 } 00190 } 00191 } 00192 00193 static void convert_from_format(int32_t *data, void *source, 00194 cl_channel_type type, unsigned int channels) 00195 { 00196 // Convert always the four components of source to target 00197 if (type == CL_SIGNED_INT32) 00198 std::memcpy(data, source, channels * sizeof(int32_t)); 00199 00200 for (unsigned int i=0; i<channels; ++i) 00201 { 00202 switch (type) 00203 { 00204 case CL_SIGNED_INT8: 00205 data[i] = ((int8_t *)source)[i]; 00206 break; 00207 case CL_SIGNED_INT16: 00208 data[i] = ((int16_t *)source)[i]; 00209 break; 00210 } 00211 } 00212 } 00213 00214 static void convert_to_format(void *dest, uint32_t *data, 00215 cl_channel_type type, unsigned int channels) 00216 { 00217 // Convert always the four components of source to target 00218 if (type == CL_UNSIGNED_INT32) 00219 std::memcpy(dest, data, channels * sizeof(uint32_t)); 00220 00221 for (unsigned int i=0; i<3; ++i) 00222 { 00223 switch (type) 00224 { 00225 case CL_UNSIGNED_INT8: 00226 ((uint8_t *)dest)[i] = data[i]; 00227 break; 00228 case CL_UNSIGNED_INT16: 00229 ((uint16_t *)dest)[i] = data[i]; 00230 break; 00231 } 00232 } 00233 } 00234 00235 static void convert_from_format(uint32_t *data, void *source, 00236 cl_channel_type type, unsigned int channels) 00237 { 00238 // Convert always the four components of source to target 00239 if (type == CL_UNSIGNED_INT32) 00240 std::memcpy(data, source, channels * sizeof(uint32_t)); 00241 00242 for (unsigned int i=0; i<channels; ++i) 00243 { 00244 switch (type) 00245 { 00246 case CL_UNSIGNED_INT8: 00247 data[i] = ((uint8_t *)source)[i]; 00248 break; 00249 case CL_UNSIGNED_INT16: 00250 data[i] = ((uint16_t *)source)[i]; 00251 break; 00252 } 00253 } 00254 } 00255 00256 template<typename T> 00257 static void vec4_scalar_mul(T *vec, float val) 00258 { 00259 for (unsigned int i=0; i<4; ++i) 00260 vec[i] *= val; 00261 } 00262 00263 template<typename T> 00264 static void vec4_add(T *vec1, T *vec2) 00265 { 00266 for (unsigned int i=0; i<4; ++i) 00267 vec1[i] += vec2[i]; 00268 } 00269 00270 template<typename T> 00271 void CPUKernelWorkGroup::linear3D(T *result, float a, float b, float c, 00272 int i0, int j0, int k0, int i1, int j1, int k1, 00273 Image3D *image) const 00274 { 00275 T accum[4]; 00276 00277 readImageImplI<T>(result, image, i0, j0, k0, 0); 00278 vec4_scalar_mul(result, (1.0f - a) * (1.0f - b) * (1.0f - c )); 00279 00280 readImageImplI<T>(accum, image, i1, j0, k0, 0); 00281 vec4_scalar_mul(accum, a * (1.0f - b) * (1.0f - c )); 00282 vec4_add(result, accum); 00283 00284 readImageImplI<T>(accum, image, i0, j1, k0, 0); 00285 vec4_scalar_mul(accum, (1.0f - a) * b * (1.0f - c )); 00286 vec4_add(result, accum); 00287 00288 readImageImplI<T>(accum, image, i1, j1, k0, 0); 00289 vec4_scalar_mul(accum, a * b * (1.0f -c )); 00290 vec4_add(result, accum); 00291 00292 readImageImplI<T>(accum, image, i0, j0, k1, 0); 00293 vec4_scalar_mul(accum, (1.0f - a) * (1.0f - b) * c); 00294 vec4_add(result, accum); 00295 00296 readImageImplI<T>(accum, image, i1, j0, k1, 0); 00297 vec4_scalar_mul(accum, a * (1.0f - b) * c); 00298 vec4_add(result, accum); 00299 00300 readImageImplI<T>(accum, image, i0, j1, k1, 0); 00301 vec4_scalar_mul(accum, (1.0f - a) * b * c); 00302 vec4_add(result, accum); 00303 00304 readImageImplI<T>(accum, image, i1, j1, k1, 0); 00305 vec4_scalar_mul(accum, a * b * c); 00306 vec4_add(result, accum); 00307 } 00308 00309 template<typename T> 00310 void CPUKernelWorkGroup::linear2D(T *result, float a, float b, float c, int i0, int j0, 00311 int i1, int j1, Image2D *image) const 00312 { 00313 T accum[4]; 00314 00315 readImageImplI<T>(result, image, i0, j0, 0, 0); 00316 vec4_scalar_mul(result, (1.0f - a) * (1.0f - b)); 00317 00318 readImageImplI<T>(accum, image, i1, j0, 0, 0); 00319 vec4_scalar_mul(accum, a * (1.0f - b)); 00320 vec4_add(result, accum); 00321 00322 readImageImplI<T>(accum, image, i0, j1, 0, 0); 00323 vec4_scalar_mul(accum, (1.0f - a) * b); 00324 vec4_add(result, accum); 00325 00326 readImageImplI<T>(accum, image, i1, j1, 0, 0); 00327 vec4_scalar_mul(accum, a * b); 00328 vec4_add(result, accum); 00329 } 00330 00331 #if __has_builtin(__builtin_shufflevector) 00332 #define shuffle4(rs, a, b, x, y, z, w) \ 00333 *(__v4sf *)rs = __builtin_shufflevector(*(__v4sf *)a, *(__v4sf *)b, \ 00334 x, y, z, w) 00335 #else 00336 #define shuffle4(rs, a, b, x, y, z, w) \ 00337 slow_shuffle4(rs, a, b, x, y, z, w) 00338 #endif 00339 00340 static void swizzle(uint32_t *target, uint32_t *source, 00341 cl_channel_order order, bool reading, uint32_t t_max) 00342 { 00343 uint32_t special[4] = {0, t_max, 0, 0 }; 00344 00345 if (reading) 00346 { 00347 switch (order) 00348 { 00349 case CL_R: 00350 case CL_Rx: 00351 // target = {source->x, 0, 0, t_max} 00352 shuffle4(target, source, special, 0, 4, 4, 5); 00353 break; 00354 case CL_A: 00355 // target = {0, 0, 0, source->x} 00356 shuffle4(target, source, special, 4, 4, 4, 0); 00357 break; 00358 case CL_INTENSITY: 00359 // target = {source->x, source->x, source->x, source->x} 00360 shuffle4(target, source, source, 0, 0, 0, 0); 00361 break; 00362 case CL_LUMINANCE: 00363 // target = {source->x, source->x, source->x, t_max} 00364 shuffle4(target, source, special, 0, 0, 0, 5); 00365 break; 00366 case CL_RG: 00367 case CL_RGx: 00368 // target = {source->x, source->y, 0, t_max} 00369 shuffle4(target, source, special, 0, 1, 4, 5); 00370 break; 00371 case CL_RA: 00372 // target = {source->x, 0, 0, source->y} 00373 shuffle4(target, source, special, 0, 4, 4, 1); 00374 break; 00375 case CL_RGB: 00376 case CL_RGBx: 00377 case CL_RGBA: 00378 // Nothing to do, already the good order 00379 std::memcpy(target, source, 16); 00380 break; 00381 case CL_ARGB: 00382 // target = {source->y, source->z, source->w, source->x} 00383 shuffle4(target, source, source, 1, 2, 3, 0); 00384 break; 00385 case CL_BGRA: 00386 // target = {source->z, source->y, source->x, source->w} 00387 shuffle4(target, source, source, 2, 1, 0, 3); 00388 break; 00389 } 00390 } 00391 else 00392 { 00393 switch (order) 00394 { 00395 case CL_A: 00396 // target = {source->w, undef, undef, undef} 00397 shuffle4(target, source, source, 3, 3, 3, 3); 00398 break; 00399 case CL_RA: 00400 // target = {source->x, source->w, undef, undef} 00401 shuffle4(target, source, source, 0, 3, 3, 3); 00402 break; 00403 case CL_ARGB: 00404 // target = {source->w, source->x, source->y, source->z} 00405 shuffle4(target, source, source, 3, 0, 1, 2); 00406 break; 00407 case CL_BGRA: 00408 // target = {source->z, source->y, source->x, source->w} 00409 shuffle4(target, source, source, 2, 1, 0, 3); 00410 break; 00411 default: 00412 std::memcpy(target, source, 16); 00413 } 00414 } 00415 } 00416 00417 /* 00418 * Actual implementation of the built-ins 00419 */ 00420 00421 void *CPUKernelWorkGroup::getImageData(Image2D *image, int x, int y, int z) const 00422 { 00423 CPUBuffer *buffer = 00424 (CPUBuffer *)image->deviceBuffer((DeviceInterface *)p_kernel->device()); 00425 00426 return imageData((unsigned char *)buffer->data(), 00427 x, y, z, 00428 image->row_pitch(), 00429 image->slice_pitch(), 00430 image->pixel_size()); 00431 } 00432 00433 template<typename T> 00434 void CPUKernelWorkGroup::writeImageImpl(Image2D *image, int x, int y, int z, 00435 T *color) const 00436 { 00437 T converted[4]; 00438 00439 // Swizzle to the correct order (float, int and uint are 32-bit, so the 00440 // type has no importance 00441 swizzle((uint32_t *)converted, (uint32_t *)color, 00442 image->format().image_channel_order, false, 0); 00443 00444 // Get a pointer in the image where to write the data 00445 void *target = getImageData(image, x, y, z); 00446 00447 // Convert color to the correct format 00448 convert_to_format(target, 00449 converted, 00450 image->format().image_channel_data_type, 00451 image->channels()); 00452 } 00453 00454 void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z, 00455 float *color) const 00456 { 00457 writeImageImpl<float>(image, x, y, z, color); 00458 } 00459 00460 void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z, 00461 int32_t *color) const 00462 { 00463 writeImageImpl<int32_t>(image, x, y, z, color); 00464 } 00465 00466 void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z, 00467 uint32_t *color) const 00468 { 00469 writeImageImpl<uint32_t>(image, x, y, z, color); 00470 } 00471 00472 template<typename T> 00473 uint32_t type_max_value() 00474 { 00475 return 0; 00476 } 00477 00478 template<> 00479 uint32_t type_max_value<float>() 00480 { 00481 return 1065353216; // 1.0f in decimal form 00482 } 00483 00484 template<> 00485 uint32_t type_max_value<int32_t>() 00486 { 00487 return 0x7fffffff; 00488 } 00489 00490 template<> 00491 uint32_t type_max_value<uint32_t>() 00492 { 00493 return 0xffffffff; 00494 } 00495 00496 template<typename T> 00497 void CPUKernelWorkGroup::readImageImplI(T *result, Image2D *image, int x, int y, 00498 int z, uint32_t sampler) const 00499 { 00500 // Handle the addressing mode of the sampler 00501 if (handle_address_mode(image, x, y, z, sampler)) 00502 { 00503 // Border color 00504 result[0] = 0.0f; 00505 result[1] = 0.0f; 00506 result[2] = 0.0f; 00507 00508 switch (image->format().image_channel_order) 00509 { 00510 case CL_R: 00511 case CL_RG: 00512 case CL_RGB: 00513 case CL_LUMINANCE: 00514 result[3] = 1.0f; 00515 break; 00516 default: 00517 result[3] = 0.0f; 00518 } 00519 00520 return; 00521 } 00522 00523 // Load the data from the image, converting it 00524 void *source = getImageData(image, x, y, z); 00525 T converted[4]; 00526 00527 convert_from_format(converted, 00528 source, 00529 image->format().image_channel_data_type, 00530 image->channels()); 00531 00532 // Swizzle the pixel just read and place it in result 00533 swizzle((uint32_t *)result, (uint32_t *)converted, 00534 image->format().image_channel_order, true, type_max_value<T>()); 00535 } 00536 00537 void CPUKernelWorkGroup::readImage(float *result, Image2D *image, int x, int y, 00538 int z, uint32_t sampler) const 00539 { 00540 readImageImplI<float>(result, image, x, y, z, sampler); 00541 } 00542 00543 void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, int x, int y, 00544 int z, uint32_t sampler) const 00545 { 00546 readImageImplI<int32_t>(result, image, x, y, z, sampler); 00547 } 00548 00549 void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, int x, int y, 00550 int z, uint32_t sampler) const 00551 { 00552 readImageImplI<uint32_t>(result, image, x, y, z, sampler); 00553 } 00554 00555 template<typename T> 00556 void CPUKernelWorkGroup::readImageImplF(T *result, Image2D *image, float x, 00557 float y, float z, uint32_t sampler) const 00558 { 00559 bool is_3d = (image->type() == MemObject::Image3D); 00560 Image3D *image3d = (Image3D *)image; 00561 00562 int w = image->width(), 00563 h = image->height(), 00564 d = (is_3d ? image3d->depth() : 1); 00565 00566 switch (sampler & 0xf0) 00567 { 00568 case CLK_ADDRESS_NONE: 00569 case CLK_ADDRESS_CLAMP: 00570 case CLK_ADDRESS_CLAMP_TO_EDGE: 00571 /* De-normalize coordinates */ 00572 if ((sampler & 0xf) == CLK_NORMALIZED_COORDS_TRUE) 00573 { 00574 x *= (float)w; 00575 y *= (float)h; 00576 if (is_3d) z *= (float)d; 00577 } 00578 00579 switch (sampler & 0xf00) 00580 { 00581 case CLK_FILTER_NEAREST: 00582 { 00583 readImageImplI<T>(result, image, std::floor(x), 00584 std::floor(y), std::floor(z), sampler); 00585 } 00586 case CLK_FILTER_LINEAR: 00587 { 00588 float a, b, c; 00589 00590 a = frac(x - 0.5f); 00591 b = frac(y - 0.5f); 00592 c = frac(z - 0.5f); 00593 00594 if (is_3d) 00595 { 00596 linear3D<T>(result, a, b, c, 00597 std::floor(x - 0.5f), 00598 std::floor(y - 0.5f), 00599 std::floor(z - 0.5f), 00600 std::floor(x - 0.5f) + 1, 00601 std::floor(y - 0.5f) + 1, 00602 std::floor(z - 0.5f) + 1, 00603 image3d); 00604 } 00605 else 00606 { 00607 linear2D<T>(result, a, b, c, 00608 std::floor(x - 0.5f), 00609 std::floor(y - 0.5f), 00610 std::floor(x - 0.5f) + 1, 00611 std::floor(y - 0.5f) + 1, 00612 image); 00613 } 00614 } 00615 } 00616 break; 00617 case CLK_ADDRESS_REPEAT: 00618 switch (sampler & 0xf00) 00619 { 00620 case CLK_FILTER_NEAREST: 00621 { 00622 int i, j, k; 00623 00624 x = (x - std::floor(x)) * (float)w; 00625 i = std::floor(x); 00626 if (i > w - 1) 00627 i = i - w; 00628 00629 y = (y - std::floor(y)) * (float)h; 00630 j = std::floor(y); 00631 if (j > h - 1) 00632 j = j - h; 00633 00634 if (is_3d) 00635 { 00636 z = (z - std::floor(z)) * (float)d; 00637 k = std::floor(z); 00638 if (k > d - 1) 00639 k = k - d; 00640 } 00641 00642 readImageImplI<T>(result, image, i, j, k, sampler); 00643 } 00644 case CLK_FILTER_LINEAR: 00645 { 00646 float a, b, c; 00647 int i0, i1, j0, j1, k0, k1; 00648 00649 x = (x - std::floor(x)) * (float)w; 00650 i0 = std::floor(x - 0.5f); 00651 i1 = i0 + 1; 00652 if (i0 < 0) 00653 i0 = w + i0; 00654 if (i1 > w - 1) 00655 i1 = i1 - w; 00656 00657 y = (y - std::floor(y)) * (float)h; 00658 j0 = std::floor(y - 0.5f); 00659 j1 = j0 + 1; 00660 if (j0 < 0) 00661 j0 = h + j0; 00662 if (j1 > h - 1) 00663 j1 = j1 - h; 00664 00665 if (is_3d) 00666 { 00667 z = (z - std::floor(z)) * (float)d; 00668 k0 = std::floor(z - 0.5f); 00669 k1 = k0 + 1; 00670 if (k0 < 0) 00671 k0 = d + k0; 00672 if (k1 > d - 1) 00673 k1 = k1 - d; 00674 } 00675 00676 a = frac(x - 0.5f); 00677 b = frac(y - 0.5f); 00678 c = frac(z - 0.5f); 00679 00680 if (is_3d) 00681 { 00682 linear3D<T>(result, a, b, c, i0, j0, k0, i1, j1, k1, 00683 image3d); 00684 } 00685 else 00686 { 00687 linear2D<T>(result, a, b, c, i0, j0, i1, j1, image); 00688 } 00689 } 00690 } 00691 break; 00692 case CLK_ADDRESS_MIRRORED_REPEAT: 00693 switch (sampler & 0xf00) 00694 { 00695 case CLK_FILTER_NEAREST: 00696 { 00697 x = std::fabs(x - 2.0f * round(0.5f * x)) * (float)w; 00698 y = std::fabs(y - 2.0f * round(0.5f * y)) * (float)h; 00699 if (is_3d) 00700 z = std::fabs(z - 2.0f * round(0.5f * z)) * (float)d; 00701 00702 readImageImplI<T>(result, image, 00703 min(std::floor(x), w - 1), 00704 min(std::floor(y), h - 1), 00705 min(std::floor(z), d - 1), 00706 sampler); 00707 } 00708 case CLK_FILTER_LINEAR: 00709 { 00710 float a, b, c; 00711 int i0, i1, j0, j1, k0, k1; 00712 00713 x = std::fabs(x - 2.0f * round(0.5f * x)) * (float)w; 00714 i0 = std::floor(x - 0.5f); 00715 i1 = i0 + 1; 00716 i0 = max(i0, 0); 00717 i1 = min(i1, w - 1); 00718 00719 y = std::fabs(y - 2.0f * round(0.5f * y)) * (float)h; 00720 j0 = std::floor(y - 0.5f); 00721 j1 = j0 + 1; 00722 j0 = max(j0, 0); 00723 j1 = min(j1, h - 1); 00724 00725 if (is_3d) 00726 { 00727 z = std::fabs(z - 2.0f * round(0.5f * z)) * (float)d; 00728 k0 = std::floor(z - 0.5f); 00729 k1 = k0 + 1; 00730 k0 = max(k0, 0); 00731 k1 = min(k1, d - 1); 00732 } 00733 00734 a = frac(x - 0.5f); 00735 b = frac(y - 0.5f); 00736 c = frac(z - 0.5f); 00737 00738 if (is_3d) 00739 { 00740 linear3D<T>(result, a, b, c, i0, j0, k0, i1, j1, k1, 00741 image3d); 00742 } 00743 else 00744 { 00745 linear2D<T>(result, a, b, c, i0, j0, i1, j1, image); 00746 } 00747 } 00748 } 00749 break; 00750 } 00751 } 00752 00753 void CPUKernelWorkGroup::readImage(float *result, Image2D *image, float x, 00754 float y, float z, uint32_t sampler) const 00755 { 00756 readImageImplF<float>(result, image, x, y, z, sampler); 00757 } 00758 00759 void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, float x, 00760 float y, float z, uint32_t sampler) const 00761 { 00762 readImageImplF<int32_t>(result, image, x, y, z, sampler); 00763 } 00764 00765 void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, float x, 00766 float y, float z, uint32_t sampler) const 00767 { 00768 readImageImplF<uint32_t>(result, image, x, y, z, sampler); 00769 }