Clover Git
OpenCL 1.1 software implementation

builtins.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions are met:
00007  *     * Redistributions of source code must retain the above copyright
00008  *       notice, this list of conditions and the following disclaimer.
00009  *     * Redistributions in binary form must reproduce the above copyright
00010  *       notice, this list of conditions and the following disclaimer in the
00011  *       documentation and/or other materials provided with the distribution.
00012  *     * Neither the name of the copyright holder nor the
00013  *       names of its contributors may be used to endorse or promote products
00014  *       derived from this software without specific prior written permission.
00015  *
00016  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
00017  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00018  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00019  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
00020  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00021  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00022  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00023  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00024  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00025  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00026  */
00027 
00039 #include "builtins.h"
00040 #include "kernel.h"
00041 #include "buffer.h"
00042 
00043 #include "../events.h"
00044 #include "../memobject.h"
00045 
00046 #include <sys/mman.h>
00047 #include <signal.h>
00048 
00049 #include <llvm/Function.h>
00050 #include <iostream>
00051 #include <cstring>
00052 
00053 #include <stdio.h>
00054 
00055 using namespace Coal;
00056 
00057 unsigned char *imageData(unsigned char *base, size_t x, size_t y, size_t z,
00058                          size_t row_pitch, size_t slice_pitch,
00059                          unsigned int bytes_per_pixel)
00060 {
00061     unsigned char *result = base;
00062 
00063     result += (z * slice_pitch) +
00064               (y * row_pitch) +
00065               (x * bytes_per_pixel);
00066 
00067     return result;
00068 }
00069 
00070 /*
00071  * TLS-related functions
00072  */
00073 __thread Coal::CPUKernelWorkGroup *g_work_group;    
00074 __thread void *work_items_data;                     
00075 __thread size_t work_items_size;                    
00077 void setThreadLocalWorkGroup(Coal::CPUKernelWorkGroup *current)
00078 {
00079     g_work_group = current;
00080 }
00081 
00082 void *getWorkItemsData(size_t &size)
00083 {
00084     size = work_items_size;
00085     return work_items_data;
00086 }
00087 
00088 void setWorkItemsData(void *ptr, size_t size)
00089 {
00090     work_items_data = ptr;
00091     work_items_size = size;
00092 }
00093 
00094 /*
00095  * Actual built-ins implementations
00096  */
00097 cl_uint CPUKernelWorkGroup::getWorkDim() const
00098 {
00099     return p_work_dim;
00100 }
00101 
00102 size_t CPUKernelWorkGroup::getGlobalId(cl_uint dimindx) const
00103 {
00104     if (dimindx > p_work_dim)
00105         return 0;
00106 
00107     return p_global_id_start_offset[dimindx] + p_current_context->local_id[dimindx];
00108 }
00109 
00110 size_t CPUKernelWorkGroup::getGlobalSize(cl_uint dimindx) const
00111 {
00112     if (dimindx >p_work_dim)
00113         return 1;
00114 
00115     return p_event->global_work_size(dimindx);
00116 }
00117 
00118 size_t CPUKernelWorkGroup::getLocalSize(cl_uint dimindx) const
00119 {
00120     if (dimindx > p_work_dim)
00121         return 1;
00122 
00123     return p_event->local_work_size(dimindx);
00124 }
00125 
00126 size_t CPUKernelWorkGroup::getLocalID(cl_uint dimindx) const
00127 {
00128     if (dimindx > p_work_dim)
00129         return 0;
00130 
00131     return p_current_context->local_id[dimindx];
00132 }
00133 
00134 size_t CPUKernelWorkGroup::getNumGroups(cl_uint dimindx) const
00135 {
00136     if (dimindx > p_work_dim)
00137         return 1;
00138 
00139     return (p_event->global_work_size(dimindx) /
00140             p_event->local_work_size(dimindx));
00141 }
00142 
00143 size_t CPUKernelWorkGroup::getGroupID(cl_uint dimindx) const
00144 {
00145     if (dimindx > p_work_dim)
00146         return 0;
00147 
00148     return p_index[dimindx];
00149 }
00150 
00151 size_t CPUKernelWorkGroup::getGlobalOffset(cl_uint dimindx) const
00152 {
00153     if (dimindx > p_work_dim)
00154         return 0;
00155 
00156     return p_event->global_work_offset(dimindx);
00157 }
00158 
00159 void CPUKernelWorkGroup::barrier(unsigned int flags)
00160 {
00161     p_had_barrier = true;
00162 
00163     // Allocate or reuse TLS memory for the stacks (it isn't freed between
00164     // the work groups, and even the kernels, so if we need less space than
00165     // allocated, it's good)
00166     if (!p_contexts)
00167     {
00168         if (p_current_work_item != 0)
00169         {
00170             // Completely abnormal, it means that not every work-items
00171             // encounter the barrier
00172             std::cerr << "*** Not every work-items of "
00173                       << p_kernel->function()->getNameStr()
00174                       << " calls barrier(); !" << std::endl;
00175             return;
00176         }
00177 
00178         // Allocate or reuse the stacks
00179         size_t contexts_size;
00180         p_contexts = getWorkItemsData(contexts_size);
00181         size_t needed_size = p_num_work_items * (p_stack_size + sizeof(Context));
00182 
00183         if (!p_contexts || contexts_size < needed_size)
00184         {
00185             // We must allocate a new space
00186             if (p_contexts)
00187                 munmap(p_contexts, contexts_size);
00188 
00189             p_contexts = mmap(0, needed_size, PROT_EXEC | PROT_READ | PROT_WRITE, /* People say a stack must be executable */
00190                             MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
00191 
00192             setWorkItemsData(p_contexts, contexts_size);
00193         }
00194 
00195         // Now that we have a real main context, initialize it
00196         p_current_context = getContextAddr(0);
00197         p_current_context->initialized = 1;
00198         std::memset(p_current_context->local_id, 0, p_work_dim * sizeof(size_t));
00199 
00200         getcontext(&p_current_context->context);
00201     }
00202 
00203     // Take the next context
00204     p_current_work_item++;
00205     if (p_current_work_item == p_num_work_items) p_current_work_item = 0;
00206 
00207     Context *next = getContextAddr(p_current_work_item);
00208     Context *main = getContextAddr(0);  // The context not created with makecontext
00209 
00210     // If the next context isn't initialized, initialize it.
00211     // Note: mmap zeroes the memory, so next->initialized == 0 if it isn't initialized
00212     if (next->initialized == 0)
00213     {
00214         next->initialized = 1;
00215 
00216         // local-id of next is the one of the current context, but incVec'ed
00217         std::memcpy(next->local_id, p_current_context->local_id,
00218                     MAX_WORK_DIMS * sizeof(size_t));
00219 
00220         incVec(p_work_dim, next->local_id, p_max_local_id);
00221 
00222         // Initialize the next context
00223         if (getcontext(&next->context) != 0)
00224             return;
00225 
00226         // Get its stack. It is located a next + sizeof(Context)
00227         char *stack = (char *)next;
00228         stack += sizeof(Context);
00229 
00230         next->context.uc_link = &main->context;
00231         next->context.uc_stack.ss_sp = stack;
00232         next->context.uc_stack.ss_size = p_stack_size;
00233 
00234         // Tell it to run the kernel function
00235         makecontext(&next->context, (void (*)())p_kernel_func_addr, 1, p_args);
00236     }
00237 
00238     // Switch to the next context
00239     ucontext_t *cur = &p_current_context->context;
00240     p_current_context = next;
00241 
00242     swapcontext(cur, &next->context);
00243 
00244     // When we return here, it means that all the other work items encountered
00245     // a barrier and that we returned to this one. We can continue.
00246 }
00247 
00248 void CPUKernelWorkGroup::builtinNotFound(const std::string &name) const
00249 {
00250     std::cout << "OpenCL: Non-existant builtin function " << name
00251               << " found in kernel " << p_kernel->function()->getNameStr()
00252               << '.' << std::endl;
00253 }
00254 
00255 /*
00256  * Built-in functions
00257  */
00258 
00259 static size_t get_global_id(cl_uint dimindx)
00260 {
00261     return g_work_group->getGlobalId(dimindx);
00262 }
00263 
00264 static cl_uint get_work_dim()
00265 {
00266     return g_work_group->getWorkDim();
00267 }
00268 
00269 static size_t get_global_size(uint dimindx)
00270 {
00271     return g_work_group->getGlobalSize(dimindx);
00272 }
00273 
00274 static size_t get_local_size(uint dimindx)
00275 {
00276     return g_work_group->getLocalSize(dimindx);
00277 }
00278 
00279 static size_t get_local_id(uint dimindx)
00280 {
00281     return g_work_group->getLocalID(dimindx);
00282 }
00283 
00284 static size_t get_num_groups(uint dimindx)
00285 {
00286     return g_work_group->getNumGroups(dimindx);
00287 }
00288 
00289 static size_t get_group_id(uint dimindx)
00290 {
00291     return g_work_group->getGroupID(dimindx);
00292 }
00293 
00294 static size_t get_global_offset(uint dimindx)
00295 {
00296     return g_work_group->getGlobalOffset(dimindx);
00297 }
00298 
00299 static void barrier(unsigned int flags)
00300 {
00301     g_work_group->barrier(flags);
00302 }
00303 
00304 // Images
00305 
00306 static int get_image_width(Image2D *image)
00307 {
00308     return image->width();
00309 }
00310 
00311 static int get_image_height(Image2D *image)
00312 {
00313     return image->height();
00314 }
00315 
00316 static int get_image_depth(Image3D *image)
00317 {
00318     if (image->type() != MemObject::Image3D)
00319         return 1;
00320 
00321     return image->depth();
00322 }
00323 
00324 static int get_image_channel_data_type(Image2D *image)
00325 {
00326     return image->format().image_channel_data_type;
00327 }
00328 
00329 static int get_image_channel_order(Image2D *image)
00330 {
00331     return image->format().image_channel_order;
00332 }
00333 
00334 static void *image_data(Image2D *image, int x, int y, int z, int *order, int *type)
00335 {
00336     *order = image->format().image_channel_order;
00337     *type = image->format().image_channel_data_type;
00338 
00339     return g_work_group->getImageData(image, x, y, z);
00340 }
00341 
00342 static bool is_image_3d(Image3D *image)
00343 {
00344     return (image->type() == MemObject::Image3D ? 1 : 0);
00345 }
00346 
00347 static void write_imagef(Image2D *image, int x, int y, int z, float *color)
00348 {
00349     g_work_group->writeImage(image, x, y, z, color);
00350 }
00351 
00352 static void write_imagei(Image2D *image, int x, int y, int z, int32_t *color)
00353 {
00354     g_work_group->writeImage(image, x, y, z, color);
00355 }
00356 
00357 static void write_imageui(Image2D *image, int x, int y, int z, uint32_t *color)
00358 {
00359     g_work_group->writeImage(image, x, y, z, color);
00360 }
00361 
00362 static void read_imagefi(float *result, Image2D *image, int x, int y, int z,
00363                          int32_t sampler)
00364 {
00365     g_work_group->readImage(result, image, x, y, z, sampler);
00366 }
00367 
00368 static void read_imageii(int32_t *result, Image2D *image, int x, int y, int z,
00369                          int32_t sampler)
00370 {
00371     g_work_group->readImage(result, image, x, y, z, sampler);
00372 }
00373 
00374 static void read_imageuii(uint32_t *result, Image2D *image, int x, int y, int z,
00375                          int32_t sampler)
00376 {
00377     g_work_group->readImage(result, image, x, y, z, sampler);
00378 }
00379 
00380 static void read_imageff(float *result, Image2D *image, float x, float y,
00381                          float z, int32_t sampler)
00382 {
00383     g_work_group->readImage(result, image, x, y, z, sampler);
00384 }
00385 
00386 static void read_imageif(int32_t *result, Image2D *image, float x, float y,
00387                          float z, int32_t sampler)
00388 {
00389     g_work_group->readImage(result, image, x, y, z, sampler);
00390 }
00391 
00392 static void read_imageuif(uint32_t *result, Image2D *image, float x, float y,
00393                           float z, int32_t sampler)
00394 {
00395     g_work_group->readImage(result, image, x, y, z, sampler);
00396 }
00397 
00398 /*
00399  * Bridge between LLVM and us
00400  */
00401 static void unimplemented_stub()
00402 {
00403 }
00404 
00405 void *getBuiltin(const std::string &name)
00406 {
00407     if (name == "get_global_id")
00408         return (void *)&get_global_id;
00409     else if (name == "get_work_dim")
00410         return (void *)&get_work_dim;
00411     else if (name == "get_global_size")
00412         return (void *)&get_global_size;
00413     else if (name == "get_local_size")
00414         return (void *)&get_local_size;
00415     else if (name == "get_local_id")
00416         return (void *)&get_local_id;
00417     else if (name == "get_num_groups")
00418         return (void *)&get_num_groups;
00419     else if (name == "get_group_id")
00420         return (void *)&get_group_id;
00421     else if (name == "get_global_offset")
00422         return (void *)&get_global_offset;
00423     else if (name == "barrier")
00424         return (void *)&barrier;
00425 
00426     else if (name == "__cpu_get_image_width")
00427         return (void *)&get_image_width;
00428     else if (name == "__cpu_get_image_height")
00429         return (void *)&get_image_height;
00430     else if (name == "__cpu_get_image_depth")
00431         return (void *)&get_image_depth;
00432     else if (name == "__cpu_get_image_channel_data_type")
00433         return (void *)&get_image_channel_data_type;
00434     else if (name == "__cpu_get_image_channel_order")
00435         return (void *)&get_image_channel_order;
00436     else if (name == "__cpu_image_data")
00437         return (void *)&image_data;
00438     else if (name == "__cpu_is_image_3d")
00439         return (void *)&is_image_3d;
00440     else if (name == "__cpu_write_imagef")
00441         return (void *)&write_imagef;
00442     else if (name == "__cpu_write_imagei")
00443         return (void *)&write_imagei;
00444     else if (name == "__cpu_write_imageui")
00445         return (void *)&write_imageui;
00446     else if (name == "__cpu_read_imagefi")
00447         return (void *)&read_imagefi;
00448     else if (name == "__cpu_read_imageii")
00449         return (void *)&read_imageii;
00450     else if (name == "__cpu_read_imageuii")
00451         return (void *)&read_imageuii;
00452     else if (name == "__cpu_read_imageff")
00453         return (void *)&read_imageff;
00454     else if (name == "__cpu_read_imageif")
00455         return (void *)&read_imageif;
00456     else if (name == "__cpu_read_imageuif")
00457         return (void *)&read_imageuif;
00458 
00459     else if (name == "debug")
00460         return (void *)&printf;
00461 
00462     // Function not found
00463     g_work_group->builtinNotFound(name);
00464 
00465     return (void *)&unimplemented_stub;
00466 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines