Clover Git
OpenCL 1.1 software implementation

sampler.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions are met:
00007  *     * Redistributions of source code must retain the above copyright
00008  *       notice, this list of conditions and the following disclaimer.
00009  *     * Redistributions in binary form must reproduce the above copyright
00010  *       notice, this list of conditions and the following disclaimer in the
00011  *       documentation and/or other materials provided with the distribution.
00012  *     * Neither the name of the copyright holder nor the
00013  *       names of its contributors may be used to endorse or promote products
00014  *       derived from this software without specific prior written permission.
00015  *
00016  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
00017  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00018  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00019  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
00020  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00021  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00022  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00023  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00024  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00025  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00026  */
00027 
00037 #include "../memobject.h"
00038 #include "../sampler.h"
00039 #include "kernel.h"
00040 #include "buffer.h"
00041 #include "builtins.h"
00042 
00043 #include <cstdlib>
00044 #include <cmath>
00045 #include <immintrin.h>
00046 
00047 using namespace Coal;
00048 
00049 /*
00050  * Helper functions
00051  */
00052 
00053 static int clamp(int a, int b, int c)
00054 {
00055     return (a < b) ? b : ((a > c) ? c : a);
00056 }
00057 
00058 static int min(int a, int b)
00059 {
00060     return (a < b ? a : b);
00061 }
00062 
00063 static int max(int a, int b)
00064 {
00065     return (a > b ? a : b);
00066 }
00067 
00068 static float frac(float x)
00069 {
00070     return x - std::floor(x);
00071 }
00072 
00073 static float round(float x)
00074 {
00075     return (float)(int)x;
00076 }
00077 
00078 static bool handle_address_mode(Image2D *image, int &x, int &y, int &z,
00079                                 uint32_t sampler)
00080 {
00081     bool is_3d = (image->type() == MemObject::Image3D);
00082     int w = image->width(),
00083         h = image->height(),
00084         d = (is_3d ? ((Image3D *)image)->depth() : 1);
00085 
00086     if ((sampler & 0xf0) ==  CLK_ADDRESS_CLAMP_TO_EDGE)
00087     {
00088         x = clamp(x, 0, w - 1);
00089         y = clamp(y, 0, h - 1);
00090         if (is_3d) z = clamp(z, 0, d - 1);
00091     }
00092     else if ((sampler & 0xf0) == CLK_ADDRESS_CLAMP)
00093     {
00094         x = clamp(x, 0, w);
00095         y = clamp(y, 0, h);
00096         if (is_3d) z = clamp(z, 0, d);
00097     }
00098 
00099     return (x == w || y == h || z == d);
00100 }
00101 
00102 /*
00103  * Macros or functions used to accelerate the functions
00104  */
00105 #ifndef __has_builtin
00106     #define __has_builtin(x) 0
00107 #endif
00108 
00109 static void slow_shuffle4(uint32_t *rs, uint32_t *a, uint32_t *b,
00110                           int x, int y, int z, int w)
00111 {
00112     rs[0] = (x < 4 ? a[x] : b[x - 4]);
00113     rs[1] = (y < 4 ? a[y] : b[y - 4]);
00114     rs[2] = (z < 4 ? a[z] : b[z - 4]);
00115     rs[3] = (w < 4 ? a[w] : b[w - 4]);
00116 }
00117 
00118 static void convert_to_format(void *dest, float *data,
00119                                    cl_channel_type type, unsigned int channels)
00120 {
00121     // Convert always the four components of source to target
00122     if (type == CL_FLOAT)
00123         std::memcpy(dest, data, channels * sizeof(float));
00124 
00125     for (unsigned int i=0; i<channels; ++i)
00126     {
00127         switch (type)
00128         {
00129             case CL_SNORM_INT8:
00130                 ((int8_t *)dest)[i] = data[i] * 128.0f;
00131                 break;
00132             case CL_SNORM_INT16:
00133                 ((int16_t *)dest)[i] = data[i] * 32767.0f;
00134                 break;
00135             case CL_UNORM_INT8:
00136                 ((uint8_t *)dest)[i] = data[i] * 255.0f;
00137                 break;
00138             case CL_UNORM_INT16:
00139                 ((uint16_t *)dest)[i] = data[i] * 65535.0f;
00140                 break;
00141         }
00142     }
00143 }
00144 
00145 static void convert_from_format(float *data, void *source,
00146                                      cl_channel_type type, unsigned int channels)
00147 {
00148     // Convert always the four components of source to target
00149     if (type == CL_FLOAT)
00150         std::memcpy(data, source, channels * sizeof(float));
00151 
00152     for (unsigned int i=0; i<channels; ++i)
00153     {
00154         switch (type)
00155         {
00156             case CL_SNORM_INT8:
00157                 data[i] = (float)((int8_t *)source)[i] / 127.0f;
00158                 break;
00159             case CL_SNORM_INT16:
00160                 data[i] = (float)((int16_t *)source)[i] / 32767.0f;
00161                 break;
00162             case CL_UNORM_INT8:
00163                 data[i] = (float)((uint8_t *)source)[i] / 127.0f;
00164                 break;
00165             case CL_UNORM_INT16:
00166                 data[i] = (float)((uint16_t *)source)[i] / 127.0f;
00167                 break;
00168         }
00169     }
00170 }
00171 
00172 static void convert_to_format(void *dest, int *data,
00173                                    cl_channel_type type, unsigned int channels)
00174 {
00175     // Convert always the four components of source to target
00176     if (type == CL_SIGNED_INT32)
00177         std::memcpy(dest, data, channels * sizeof(int32_t));
00178 
00179     for (unsigned int i=0; i<channels; ++i)
00180     {
00181         switch (type)
00182         {
00183             case CL_SIGNED_INT8:
00184                 ((int8_t *)dest)[i] = data[i];
00185                 break;
00186             case CL_SIGNED_INT16:
00187                 ((int16_t *)dest)[i] = data[i];
00188                 break;
00189         }
00190     }
00191 }
00192 
00193 static void convert_from_format(int32_t *data, void *source,
00194                                      cl_channel_type type, unsigned int channels)
00195 {
00196     // Convert always the four components of source to target
00197     if (type == CL_SIGNED_INT32)
00198         std::memcpy(data, source, channels * sizeof(int32_t));
00199 
00200     for (unsigned int i=0; i<channels; ++i)
00201     {
00202         switch (type)
00203         {
00204             case CL_SIGNED_INT8:
00205                 data[i] = ((int8_t *)source)[i];
00206                 break;
00207             case CL_SIGNED_INT16:
00208                 data[i] = ((int16_t *)source)[i];
00209                 break;
00210         }
00211     }
00212 }
00213 
00214 static void convert_to_format(void *dest, uint32_t *data,
00215                                    cl_channel_type type, unsigned int channels)
00216 {
00217     // Convert always the four components of source to target
00218     if (type == CL_UNSIGNED_INT32)
00219         std::memcpy(dest, data, channels * sizeof(uint32_t));
00220 
00221     for (unsigned int i=0; i<3; ++i)
00222     {
00223         switch (type)
00224         {
00225             case CL_UNSIGNED_INT8:
00226                 ((uint8_t *)dest)[i] = data[i];
00227                 break;
00228             case CL_UNSIGNED_INT16:
00229                 ((uint16_t *)dest)[i] = data[i];
00230                 break;
00231         }
00232     }
00233 }
00234 
00235 static void convert_from_format(uint32_t *data, void *source,
00236                                      cl_channel_type type, unsigned int channels)
00237 {
00238     // Convert always the four components of source to target
00239     if (type == CL_UNSIGNED_INT32)
00240         std::memcpy(data, source, channels * sizeof(uint32_t));
00241 
00242     for (unsigned int i=0; i<channels; ++i)
00243     {
00244         switch (type)
00245         {
00246             case CL_UNSIGNED_INT8:
00247                 data[i] = ((uint8_t *)source)[i];
00248                 break;
00249             case CL_UNSIGNED_INT16:
00250                 data[i] = ((uint16_t *)source)[i];
00251                 break;
00252         }
00253     }
00254 }
00255 
00256 template<typename T>
00257 static void vec4_scalar_mul(T *vec, float val)
00258 {
00259     for (unsigned int i=0; i<4; ++i)
00260         vec[i] *= val;
00261 }
00262 
00263 template<typename T>
00264 static void vec4_add(T *vec1, T *vec2)
00265 {
00266     for (unsigned int i=0; i<4; ++i)
00267         vec1[i] += vec2[i];
00268 }
00269 
00270 template<typename T>
00271 void CPUKernelWorkGroup::linear3D(T *result, float a, float b, float c,
00272               int i0, int j0, int k0, int i1, int j1, int k1,
00273               Image3D *image) const
00274 {
00275     T accum[4];
00276 
00277     readImageImplI<T>(result, image, i0, j0, k0, 0);
00278     vec4_scalar_mul(result, (1.0f - a) * (1.0f - b) * (1.0f - c ));
00279 
00280     readImageImplI<T>(accum, image, i1, j0, k0, 0);
00281     vec4_scalar_mul(accum, a * (1.0f - b) * (1.0f - c ));
00282     vec4_add(result, accum);
00283 
00284     readImageImplI<T>(accum, image, i0, j1, k0, 0);
00285     vec4_scalar_mul(accum, (1.0f - a) * b * (1.0f - c ));
00286     vec4_add(result, accum);
00287 
00288     readImageImplI<T>(accum, image, i1, j1, k0, 0);
00289     vec4_scalar_mul(accum, a * b * (1.0f -c ));
00290     vec4_add(result, accum);
00291 
00292     readImageImplI<T>(accum, image, i0, j0, k1, 0);
00293     vec4_scalar_mul(accum, (1.0f - a) * (1.0f - b) * c);
00294     vec4_add(result, accum);
00295 
00296     readImageImplI<T>(accum, image, i1, j0, k1, 0);
00297     vec4_scalar_mul(accum, a * (1.0f - b) * c);
00298     vec4_add(result, accum);
00299 
00300     readImageImplI<T>(accum, image, i0, j1, k1, 0);
00301     vec4_scalar_mul(accum, (1.0f - a) * b * c);
00302     vec4_add(result, accum);
00303 
00304     readImageImplI<T>(accum, image, i1, j1, k1, 0);
00305     vec4_scalar_mul(accum, a * b * c);
00306     vec4_add(result, accum);
00307 }
00308 
00309 template<typename T>
00310 void CPUKernelWorkGroup::linear2D(T *result, float a, float b, float c, int i0, int j0,
00311               int i1, int j1, Image2D *image) const
00312 {
00313     T accum[4];
00314 
00315     readImageImplI<T>(result, image, i0, j0, 0, 0);
00316     vec4_scalar_mul(result, (1.0f - a) * (1.0f - b));
00317 
00318     readImageImplI<T>(accum, image, i1, j0, 0, 0);
00319     vec4_scalar_mul(accum, a * (1.0f - b));
00320     vec4_add(result, accum);
00321 
00322     readImageImplI<T>(accum, image, i0, j1, 0, 0);
00323     vec4_scalar_mul(accum, (1.0f - a) * b);
00324     vec4_add(result, accum);
00325 
00326     readImageImplI<T>(accum, image, i1, j1, 0, 0);
00327     vec4_scalar_mul(accum, a * b);
00328     vec4_add(result, accum);
00329 }
00330 
00331 #if __has_builtin(__builtin_shufflevector)
00332     #define shuffle4(rs, a, b, x, y, z, w) \
00333         *(__v4sf *)rs = __builtin_shufflevector(*(__v4sf *)a, *(__v4sf *)b, \
00334                                                 x, y, z, w)
00335 #else
00336     #define shuffle4(rs, a, b, x, y, z, w) \
00337         slow_shuffle4(rs, a, b, x, y, z, w)
00338 #endif
00339 
00340 static void swizzle(uint32_t *target, uint32_t *source,
00341                     cl_channel_order order, bool reading, uint32_t t_max)
00342 {
00343     uint32_t special[4] = {0, t_max, 0, 0 };
00344 
00345     if (reading)
00346     {
00347         switch (order)
00348         {
00349             case CL_R:
00350             case CL_Rx:
00351                 // target = {source->x, 0, 0, t_max}
00352                 shuffle4(target, source, special, 0, 4, 4, 5);
00353                 break;
00354             case CL_A:
00355                 // target = {0, 0, 0, source->x}
00356                 shuffle4(target, source, special, 4, 4, 4, 0);
00357                 break;
00358             case CL_INTENSITY:
00359                 // target = {source->x, source->x, source->x, source->x}
00360                 shuffle4(target, source, source, 0, 0, 0, 0);
00361                 break;
00362             case CL_LUMINANCE:
00363                 // target = {source->x, source->x, source->x, t_max}
00364                 shuffle4(target, source, special, 0, 0, 0, 5);
00365                 break;
00366             case CL_RG:
00367             case CL_RGx:
00368                 // target = {source->x, source->y, 0, t_max}
00369                 shuffle4(target, source, special, 0, 1, 4, 5);
00370                 break;
00371             case CL_RA:
00372                 // target = {source->x, 0, 0, source->y}
00373                 shuffle4(target, source, special, 0, 4, 4, 1);
00374                 break;
00375             case CL_RGB:
00376             case CL_RGBx:
00377             case CL_RGBA:
00378                 // Nothing to do, already the good order
00379                 std::memcpy(target, source, 16);
00380                 break;
00381             case CL_ARGB:
00382                 // target = {source->y, source->z, source->w, source->x}
00383                 shuffle4(target, source, source, 1, 2, 3, 0);
00384                 break;
00385             case CL_BGRA:
00386                 // target = {source->z, source->y, source->x, source->w}
00387                 shuffle4(target, source, source, 2, 1, 0, 3);
00388                 break;
00389         }
00390     }
00391     else
00392     {
00393         switch (order)
00394         {
00395             case CL_A:
00396                 // target = {source->w, undef, undef, undef}
00397                 shuffle4(target, source, source, 3, 3, 3, 3);
00398                 break;
00399             case CL_RA:
00400                 // target = {source->x, source->w, undef, undef}
00401                 shuffle4(target, source, source, 0, 3, 3, 3);
00402                 break;
00403             case CL_ARGB:
00404                 // target = {source->w, source->x, source->y, source->z}
00405                 shuffle4(target, source, source, 3, 0, 1, 2);
00406                 break;
00407             case CL_BGRA:
00408                 // target = {source->z, source->y, source->x, source->w}
00409                 shuffle4(target, source, source, 2, 1, 0, 3);
00410                 break;
00411             default:
00412                 std::memcpy(target, source, 16);
00413         }
00414     }
00415 }
00416 
00417 /*
00418  * Actual implementation of the built-ins
00419  */
00420 
00421 void *CPUKernelWorkGroup::getImageData(Image2D *image, int x, int y, int z) const
00422 {
00423     CPUBuffer *buffer =
00424         (CPUBuffer *)image->deviceBuffer((DeviceInterface *)p_kernel->device());
00425 
00426     return imageData((unsigned char *)buffer->data(),
00427                      x, y, z,
00428                      image->row_pitch(),
00429                      image->slice_pitch(),
00430                      image->pixel_size());
00431 }
00432 
00433 template<typename T>
00434 void CPUKernelWorkGroup::writeImageImpl(Image2D *image, int x, int y, int z,
00435                                         T *color) const
00436 {
00437     T converted[4];
00438 
00439     // Swizzle to the correct order (float, int and uint are 32-bit, so the
00440     // type has no importance
00441     swizzle((uint32_t *)converted, (uint32_t *)color,
00442             image->format().image_channel_order, false, 0);
00443 
00444     // Get a pointer in the image where to write the data
00445     void *target = getImageData(image, x, y, z);
00446 
00447     // Convert color to the correct format
00448     convert_to_format(target,
00449                       converted,
00450                       image->format().image_channel_data_type,
00451                       image->channels());
00452 }
00453 
00454 void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
00455                                     float *color) const
00456 {
00457     writeImageImpl<float>(image, x, y, z, color);
00458 }
00459 
00460 void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
00461                                     int32_t *color) const
00462 {
00463     writeImageImpl<int32_t>(image, x, y, z, color);
00464 }
00465 
00466 void CPUKernelWorkGroup::writeImage(Image2D *image, int x, int y, int z,
00467                                     uint32_t *color) const
00468 {
00469     writeImageImpl<uint32_t>(image, x, y, z, color);
00470 }
00471 
00472 template<typename T>
00473 uint32_t type_max_value()
00474 {
00475     return 0;
00476 }
00477 
00478 template<>
00479 uint32_t type_max_value<float>()
00480 {
00481     return 1065353216; // 1.0f in decimal form
00482 }
00483 
00484 template<>
00485 uint32_t type_max_value<int32_t>()
00486 {
00487     return 0x7fffffff;
00488 }
00489 
00490 template<>
00491 uint32_t type_max_value<uint32_t>()
00492 {
00493     return 0xffffffff;
00494 }
00495 
00496 template<typename T>
00497 void CPUKernelWorkGroup::readImageImplI(T *result, Image2D *image, int x, int y,
00498                                         int z, uint32_t sampler) const
00499 {
00500     // Handle the addressing mode of the sampler
00501     if (handle_address_mode(image, x, y, z, sampler))
00502     {
00503         // Border color
00504         result[0] = 0.0f;
00505         result[1] = 0.0f;
00506         result[2] = 0.0f;
00507 
00508         switch (image->format().image_channel_order)
00509         {
00510             case CL_R:
00511             case CL_RG:
00512             case CL_RGB:
00513             case CL_LUMINANCE:
00514                 result[3] = 1.0f;
00515                 break;
00516             default:
00517                 result[3] = 0.0f;
00518         }
00519 
00520         return;
00521     }
00522 
00523     // Load the data from the image, converting it
00524     void *source = getImageData(image, x, y, z);
00525     T converted[4];
00526 
00527     convert_from_format(converted,
00528                         source,
00529                         image->format().image_channel_data_type,
00530                         image->channels());
00531 
00532     // Swizzle the pixel just read and place it in result
00533     swizzle((uint32_t *)result, (uint32_t *)converted,
00534             image->format().image_channel_order, true, type_max_value<T>());
00535 }
00536 
00537 void CPUKernelWorkGroup::readImage(float *result, Image2D *image, int x, int y,
00538                                    int z, uint32_t sampler) const
00539 {
00540     readImageImplI<float>(result, image, x, y, z, sampler);
00541 }
00542 
00543 void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, int x, int y,
00544                                    int z, uint32_t sampler) const
00545 {
00546     readImageImplI<int32_t>(result, image, x, y, z, sampler);
00547 }
00548 
00549 void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, int x, int y,
00550                                    int z, uint32_t sampler) const
00551 {
00552     readImageImplI<uint32_t>(result, image, x, y, z, sampler);
00553 }
00554 
00555 template<typename T>
00556 void CPUKernelWorkGroup::readImageImplF(T *result, Image2D *image, float x,
00557                                         float y, float z, uint32_t sampler) const
00558 {
00559     bool is_3d = (image->type() == MemObject::Image3D);
00560     Image3D *image3d = (Image3D *)image;
00561 
00562     int w = image->width(),
00563         h = image->height(),
00564         d = (is_3d ? image3d->depth() : 1);
00565 
00566     switch (sampler & 0xf0)
00567     {
00568         case CLK_ADDRESS_NONE:
00569         case CLK_ADDRESS_CLAMP:
00570         case CLK_ADDRESS_CLAMP_TO_EDGE:
00571             /* De-normalize coordinates */
00572             if ((sampler & 0xf) == CLK_NORMALIZED_COORDS_TRUE)
00573             {
00574                 x *= (float)w;
00575                 y *= (float)h;
00576                 if (is_3d) z *= (float)d;
00577             }
00578 
00579             switch (sampler & 0xf00)
00580             {
00581                 case CLK_FILTER_NEAREST:
00582                 {
00583                     readImageImplI<T>(result, image, std::floor(x),
00584                                       std::floor(y), std::floor(z), sampler);
00585                 }
00586                 case CLK_FILTER_LINEAR:
00587                 {
00588                     float a, b, c;
00589 
00590                     a = frac(x - 0.5f);
00591                     b = frac(y - 0.5f);
00592                     c = frac(z - 0.5f);
00593 
00594                     if (is_3d)
00595                     {
00596                         linear3D<T>(result, a, b, c,
00597                                     std::floor(x - 0.5f),
00598                                     std::floor(y - 0.5f),
00599                                     std::floor(z - 0.5f),
00600                                     std::floor(x - 0.5f) + 1,
00601                                     std::floor(y - 0.5f) + 1,
00602                                     std::floor(z - 0.5f) + 1,
00603                                     image3d);
00604                     }
00605                     else
00606                     {
00607                         linear2D<T>(result, a, b, c,
00608                                     std::floor(x - 0.5f),
00609                                     std::floor(y - 0.5f),
00610                                     std::floor(x - 0.5f) + 1,
00611                                     std::floor(y - 0.5f) + 1,
00612                                     image);
00613                     }
00614                 }
00615             }
00616             break;
00617         case CLK_ADDRESS_REPEAT:
00618             switch (sampler & 0xf00)
00619             {
00620                 case CLK_FILTER_NEAREST:
00621                 {
00622                     int i, j, k;
00623 
00624                     x = (x - std::floor(x)) * (float)w;
00625                     i = std::floor(x);
00626                     if (i > w - 1)
00627                         i = i - w;
00628 
00629                     y = (y - std::floor(y)) * (float)h;
00630                     j = std::floor(y);
00631                     if (j > h - 1)
00632                         j = j - h;
00633 
00634                     if (is_3d)
00635                     {
00636                         z = (z - std::floor(z)) * (float)d;
00637                         k = std::floor(z);
00638                         if (k > d - 1)
00639                             k = k - d;
00640                     }
00641 
00642                     readImageImplI<T>(result, image, i, j, k, sampler);
00643                 }
00644                 case CLK_FILTER_LINEAR:
00645                 {
00646                     float a, b, c;
00647                     int i0, i1, j0, j1, k0, k1;
00648 
00649                     x = (x - std::floor(x)) * (float)w;
00650                     i0 = std::floor(x - 0.5f);
00651                     i1 = i0 + 1;
00652                     if (i0 < 0)
00653                         i0 = w + i0;
00654                     if (i1 > w - 1)
00655                         i1 = i1 - w;
00656 
00657                     y = (y - std::floor(y)) * (float)h;
00658                     j0 = std::floor(y - 0.5f);
00659                     j1 = j0 + 1;
00660                     if (j0 < 0)
00661                         j0 = h + j0;
00662                     if (j1 > h - 1)
00663                         j1 = j1 - h;
00664 
00665                     if (is_3d)
00666                     {
00667                         z = (z - std::floor(z)) * (float)d;
00668                         k0 = std::floor(z - 0.5f);
00669                         k1 = k0 + 1;
00670                         if (k0 < 0)
00671                             k0 = d + k0;
00672                         if (k1 > d - 1)
00673                             k1 = k1 - d;
00674                     }
00675 
00676                     a = frac(x - 0.5f);
00677                     b = frac(y - 0.5f);
00678                     c = frac(z - 0.5f);
00679 
00680                     if (is_3d)
00681                     {
00682                         linear3D<T>(result, a, b, c, i0, j0, k0, i1, j1, k1,
00683                                     image3d);
00684                     }
00685                     else
00686                     {
00687                         linear2D<T>(result, a, b, c, i0, j0, i1, j1, image);
00688                     }
00689                 }
00690             }
00691             break;
00692         case CLK_ADDRESS_MIRRORED_REPEAT:
00693             switch (sampler & 0xf00)
00694             {
00695                 case CLK_FILTER_NEAREST:
00696                 {
00697                     x = std::fabs(x - 2.0f * round(0.5f * x)) * (float)w;
00698                     y = std::fabs(y - 2.0f * round(0.5f * y)) * (float)h;
00699                     if (is_3d)
00700                         z = std::fabs(z - 2.0f * round(0.5f * z)) * (float)d;
00701 
00702                     readImageImplI<T>(result, image,
00703                                       min(std::floor(x), w - 1),
00704                                       min(std::floor(y), h - 1),
00705                                       min(std::floor(z), d - 1),
00706                                       sampler);
00707                 }
00708                 case CLK_FILTER_LINEAR:
00709                 {
00710                     float a, b, c;
00711                     int i0, i1, j0, j1, k0, k1;
00712 
00713                     x = std::fabs(x - 2.0f * round(0.5f * x)) * (float)w;
00714                     i0 = std::floor(x - 0.5f);
00715                     i1 = i0 + 1;
00716                     i0 = max(i0, 0);
00717                     i1 = min(i1, w - 1);
00718 
00719                     y = std::fabs(y - 2.0f * round(0.5f * y)) * (float)h;
00720                     j0 = std::floor(y - 0.5f);
00721                     j1 = j0 + 1;
00722                     j0 = max(j0, 0);
00723                     j1 = min(j1, h - 1);
00724 
00725                     if (is_3d)
00726                     {
00727                         z = std::fabs(z - 2.0f * round(0.5f * z)) * (float)d;
00728                         k0 = std::floor(z - 0.5f);
00729                         k1 = k0 + 1;
00730                         k0 = max(k0, 0);
00731                         k1 = min(k1, d - 1);
00732                     }
00733 
00734                     a = frac(x - 0.5f);
00735                     b = frac(y - 0.5f);
00736                     c = frac(z - 0.5f);
00737 
00738                     if (is_3d)
00739                     {
00740                         linear3D<T>(result, a, b, c, i0, j0, k0, i1, j1, k1,
00741                                     image3d);
00742                     }
00743                     else
00744                     {
00745                         linear2D<T>(result, a, b, c, i0, j0, i1, j1, image);
00746                     }
00747                 }
00748             }
00749             break;
00750     }
00751 }
00752 
00753 void CPUKernelWorkGroup::readImage(float *result, Image2D *image, float x,
00754                                    float y, float z, uint32_t sampler) const
00755 {
00756     readImageImplF<float>(result, image, x, y, z, sampler);
00757 }
00758 
00759 void CPUKernelWorkGroup::readImage(int32_t *result, Image2D *image, float x,
00760                                    float y, float z, uint32_t sampler) const
00761 {
00762     readImageImplF<int32_t>(result, image, x, y, z, sampler);
00763 }
00764 
00765 void CPUKernelWorkGroup::readImage(uint32_t *result, Image2D *image, float x,
00766                                    float y, float z, uint32_t sampler) const
00767 {
00768     readImageImplF<uint32_t>(result, image, x, y, z, sampler);
00769 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines