Clover Git
OpenCL 1.1 software implementation
|
00001 /* 00002 * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr> 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions are met: 00007 * * Redistributions of source code must retain the above copyright 00008 * notice, this list of conditions and the following disclaimer. 00009 * * Redistributions in binary form must reproduce the above copyright 00010 * notice, this list of conditions and the following disclaimer in the 00011 * documentation and/or other materials provided with the distribution. 00012 * * Neither the name of the copyright holder nor the 00013 * names of its contributors may be used to endorse or promote products 00014 * derived from this software without specific prior written permission. 00015 * 00016 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 00017 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00018 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00019 * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY 00020 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00021 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00022 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00023 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00024 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00025 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00026 */ 00027 00039 #include "builtins.h" 00040 #include "kernel.h" 00041 #include "buffer.h" 00042 00043 #include "../events.h" 00044 #include "../memobject.h" 00045 00046 #include <sys/mman.h> 00047 #include <signal.h> 00048 00049 #include <llvm/Function.h> 00050 #include <iostream> 00051 #include <cstring> 00052 00053 #include <stdio.h> 00054 00055 using namespace Coal; 00056 00057 unsigned char *imageData(unsigned char *base, size_t x, size_t y, size_t z, 00058 size_t row_pitch, size_t slice_pitch, 00059 unsigned int bytes_per_pixel) 00060 { 00061 unsigned char *result = base; 00062 00063 result += (z * slice_pitch) + 00064 (y * row_pitch) + 00065 (x * bytes_per_pixel); 00066 00067 return result; 00068 } 00069 00070 /* 00071 * TLS-related functions 00072 */ 00073 __thread Coal::CPUKernelWorkGroup *g_work_group; 00074 __thread void *work_items_data; 00075 __thread size_t work_items_size; 00077 void setThreadLocalWorkGroup(Coal::CPUKernelWorkGroup *current) 00078 { 00079 g_work_group = current; 00080 } 00081 00082 void *getWorkItemsData(size_t &size) 00083 { 00084 size = work_items_size; 00085 return work_items_data; 00086 } 00087 00088 void setWorkItemsData(void *ptr, size_t size) 00089 { 00090 work_items_data = ptr; 00091 work_items_size = size; 00092 } 00093 00094 /* 00095 * Actual built-ins implementations 00096 */ 00097 cl_uint CPUKernelWorkGroup::getWorkDim() const 00098 { 00099 return p_work_dim; 00100 } 00101 00102 size_t CPUKernelWorkGroup::getGlobalId(cl_uint dimindx) const 00103 { 00104 if (dimindx > p_work_dim) 00105 return 0; 00106 00107 return p_global_id_start_offset[dimindx] + p_current_context->local_id[dimindx]; 00108 } 00109 00110 size_t CPUKernelWorkGroup::getGlobalSize(cl_uint dimindx) const 00111 { 00112 if (dimindx >p_work_dim) 00113 return 1; 00114 00115 return p_event->global_work_size(dimindx); 00116 } 00117 00118 size_t CPUKernelWorkGroup::getLocalSize(cl_uint dimindx) const 00119 { 00120 if (dimindx > p_work_dim) 00121 return 1; 00122 00123 return p_event->local_work_size(dimindx); 00124 } 00125 00126 size_t CPUKernelWorkGroup::getLocalID(cl_uint dimindx) const 00127 { 00128 if (dimindx > p_work_dim) 00129 return 0; 00130 00131 return p_current_context->local_id[dimindx]; 00132 } 00133 00134 size_t CPUKernelWorkGroup::getNumGroups(cl_uint dimindx) const 00135 { 00136 if (dimindx > p_work_dim) 00137 return 1; 00138 00139 return (p_event->global_work_size(dimindx) / 00140 p_event->local_work_size(dimindx)); 00141 } 00142 00143 size_t CPUKernelWorkGroup::getGroupID(cl_uint dimindx) const 00144 { 00145 if (dimindx > p_work_dim) 00146 return 0; 00147 00148 return p_index[dimindx]; 00149 } 00150 00151 size_t CPUKernelWorkGroup::getGlobalOffset(cl_uint dimindx) const 00152 { 00153 if (dimindx > p_work_dim) 00154 return 0; 00155 00156 return p_event->global_work_offset(dimindx); 00157 } 00158 00159 void CPUKernelWorkGroup::barrier(unsigned int flags) 00160 { 00161 p_had_barrier = true; 00162 00163 // Allocate or reuse TLS memory for the stacks (it isn't freed between 00164 // the work groups, and even the kernels, so if we need less space than 00165 // allocated, it's good) 00166 if (!p_contexts) 00167 { 00168 if (p_current_work_item != 0) 00169 { 00170 // Completely abnormal, it means that not every work-items 00171 // encounter the barrier 00172 std::cerr << "*** Not every work-items of " 00173 << p_kernel->function()->getNameStr() 00174 << " calls barrier(); !" << std::endl; 00175 return; 00176 } 00177 00178 // Allocate or reuse the stacks 00179 size_t contexts_size; 00180 p_contexts = getWorkItemsData(contexts_size); 00181 size_t needed_size = p_num_work_items * (p_stack_size + sizeof(Context)); 00182 00183 if (!p_contexts || contexts_size < needed_size) 00184 { 00185 // We must allocate a new space 00186 if (p_contexts) 00187 munmap(p_contexts, contexts_size); 00188 00189 p_contexts = mmap(0, needed_size, PROT_EXEC | PROT_READ | PROT_WRITE, /* People say a stack must be executable */ 00190 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); 00191 00192 setWorkItemsData(p_contexts, contexts_size); 00193 } 00194 00195 // Now that we have a real main context, initialize it 00196 p_current_context = getContextAddr(0); 00197 p_current_context->initialized = 1; 00198 std::memset(p_current_context->local_id, 0, p_work_dim * sizeof(size_t)); 00199 00200 getcontext(&p_current_context->context); 00201 } 00202 00203 // Take the next context 00204 p_current_work_item++; 00205 if (p_current_work_item == p_num_work_items) p_current_work_item = 0; 00206 00207 Context *next = getContextAddr(p_current_work_item); 00208 Context *main = getContextAddr(0); // The context not created with makecontext 00209 00210 // If the next context isn't initialized, initialize it. 00211 // Note: mmap zeroes the memory, so next->initialized == 0 if it isn't initialized 00212 if (next->initialized == 0) 00213 { 00214 next->initialized = 1; 00215 00216 // local-id of next is the one of the current context, but incVec'ed 00217 std::memcpy(next->local_id, p_current_context->local_id, 00218 MAX_WORK_DIMS * sizeof(size_t)); 00219 00220 incVec(p_work_dim, next->local_id, p_max_local_id); 00221 00222 // Initialize the next context 00223 if (getcontext(&next->context) != 0) 00224 return; 00225 00226 // Get its stack. It is located a next + sizeof(Context) 00227 char *stack = (char *)next; 00228 stack += sizeof(Context); 00229 00230 next->context.uc_link = &main->context; 00231 next->context.uc_stack.ss_sp = stack; 00232 next->context.uc_stack.ss_size = p_stack_size; 00233 00234 // Tell it to run the kernel function 00235 makecontext(&next->context, (void (*)())p_kernel_func_addr, 1, p_args); 00236 } 00237 00238 // Switch to the next context 00239 ucontext_t *cur = &p_current_context->context; 00240 p_current_context = next; 00241 00242 swapcontext(cur, &next->context); 00243 00244 // When we return here, it means that all the other work items encountered 00245 // a barrier and that we returned to this one. We can continue. 00246 } 00247 00248 void CPUKernelWorkGroup::builtinNotFound(const std::string &name) const 00249 { 00250 std::cout << "OpenCL: Non-existant builtin function " << name 00251 << " found in kernel " << p_kernel->function()->getNameStr() 00252 << '.' << std::endl; 00253 } 00254 00255 /* 00256 * Built-in functions 00257 */ 00258 00259 static size_t get_global_id(cl_uint dimindx) 00260 { 00261 return g_work_group->getGlobalId(dimindx); 00262 } 00263 00264 static cl_uint get_work_dim() 00265 { 00266 return g_work_group->getWorkDim(); 00267 } 00268 00269 static size_t get_global_size(uint dimindx) 00270 { 00271 return g_work_group->getGlobalSize(dimindx); 00272 } 00273 00274 static size_t get_local_size(uint dimindx) 00275 { 00276 return g_work_group->getLocalSize(dimindx); 00277 } 00278 00279 static size_t get_local_id(uint dimindx) 00280 { 00281 return g_work_group->getLocalID(dimindx); 00282 } 00283 00284 static size_t get_num_groups(uint dimindx) 00285 { 00286 return g_work_group->getNumGroups(dimindx); 00287 } 00288 00289 static size_t get_group_id(uint dimindx) 00290 { 00291 return g_work_group->getGroupID(dimindx); 00292 } 00293 00294 static size_t get_global_offset(uint dimindx) 00295 { 00296 return g_work_group->getGlobalOffset(dimindx); 00297 } 00298 00299 static void barrier(unsigned int flags) 00300 { 00301 g_work_group->barrier(flags); 00302 } 00303 00304 // Images 00305 00306 static int get_image_width(Image2D *image) 00307 { 00308 return image->width(); 00309 } 00310 00311 static int get_image_height(Image2D *image) 00312 { 00313 return image->height(); 00314 } 00315 00316 static int get_image_depth(Image3D *image) 00317 { 00318 if (image->type() != MemObject::Image3D) 00319 return 1; 00320 00321 return image->depth(); 00322 } 00323 00324 static int get_image_channel_data_type(Image2D *image) 00325 { 00326 return image->format().image_channel_data_type; 00327 } 00328 00329 static int get_image_channel_order(Image2D *image) 00330 { 00331 return image->format().image_channel_order; 00332 } 00333 00334 static void *image_data(Image2D *image, int x, int y, int z, int *order, int *type) 00335 { 00336 *order = image->format().image_channel_order; 00337 *type = image->format().image_channel_data_type; 00338 00339 return g_work_group->getImageData(image, x, y, z); 00340 } 00341 00342 static bool is_image_3d(Image3D *image) 00343 { 00344 return (image->type() == MemObject::Image3D ? 1 : 0); 00345 } 00346 00347 static void write_imagef(Image2D *image, int x, int y, int z, float *color) 00348 { 00349 g_work_group->writeImage(image, x, y, z, color); 00350 } 00351 00352 static void write_imagei(Image2D *image, int x, int y, int z, int32_t *color) 00353 { 00354 g_work_group->writeImage(image, x, y, z, color); 00355 } 00356 00357 static void write_imageui(Image2D *image, int x, int y, int z, uint32_t *color) 00358 { 00359 g_work_group->writeImage(image, x, y, z, color); 00360 } 00361 00362 static void read_imagefi(float *result, Image2D *image, int x, int y, int z, 00363 int32_t sampler) 00364 { 00365 g_work_group->readImage(result, image, x, y, z, sampler); 00366 } 00367 00368 static void read_imageii(int32_t *result, Image2D *image, int x, int y, int z, 00369 int32_t sampler) 00370 { 00371 g_work_group->readImage(result, image, x, y, z, sampler); 00372 } 00373 00374 static void read_imageuii(uint32_t *result, Image2D *image, int x, int y, int z, 00375 int32_t sampler) 00376 { 00377 g_work_group->readImage(result, image, x, y, z, sampler); 00378 } 00379 00380 static void read_imageff(float *result, Image2D *image, float x, float y, 00381 float z, int32_t sampler) 00382 { 00383 g_work_group->readImage(result, image, x, y, z, sampler); 00384 } 00385 00386 static void read_imageif(int32_t *result, Image2D *image, float x, float y, 00387 float z, int32_t sampler) 00388 { 00389 g_work_group->readImage(result, image, x, y, z, sampler); 00390 } 00391 00392 static void read_imageuif(uint32_t *result, Image2D *image, float x, float y, 00393 float z, int32_t sampler) 00394 { 00395 g_work_group->readImage(result, image, x, y, z, sampler); 00396 } 00397 00398 /* 00399 * Bridge between LLVM and us 00400 */ 00401 static void unimplemented_stub() 00402 { 00403 } 00404 00405 void *getBuiltin(const std::string &name) 00406 { 00407 if (name == "get_global_id") 00408 return (void *)&get_global_id; 00409 else if (name == "get_work_dim") 00410 return (void *)&get_work_dim; 00411 else if (name == "get_global_size") 00412 return (void *)&get_global_size; 00413 else if (name == "get_local_size") 00414 return (void *)&get_local_size; 00415 else if (name == "get_local_id") 00416 return (void *)&get_local_id; 00417 else if (name == "get_num_groups") 00418 return (void *)&get_num_groups; 00419 else if (name == "get_group_id") 00420 return (void *)&get_group_id; 00421 else if (name == "get_global_offset") 00422 return (void *)&get_global_offset; 00423 else if (name == "barrier") 00424 return (void *)&barrier; 00425 00426 else if (name == "__cpu_get_image_width") 00427 return (void *)&get_image_width; 00428 else if (name == "__cpu_get_image_height") 00429 return (void *)&get_image_height; 00430 else if (name == "__cpu_get_image_depth") 00431 return (void *)&get_image_depth; 00432 else if (name == "__cpu_get_image_channel_data_type") 00433 return (void *)&get_image_channel_data_type; 00434 else if (name == "__cpu_get_image_channel_order") 00435 return (void *)&get_image_channel_order; 00436 else if (name == "__cpu_image_data") 00437 return (void *)&image_data; 00438 else if (name == "__cpu_is_image_3d") 00439 return (void *)&is_image_3d; 00440 else if (name == "__cpu_write_imagef") 00441 return (void *)&write_imagef; 00442 else if (name == "__cpu_write_imagei") 00443 return (void *)&write_imagei; 00444 else if (name == "__cpu_write_imageui") 00445 return (void *)&write_imageui; 00446 else if (name == "__cpu_read_imagefi") 00447 return (void *)&read_imagefi; 00448 else if (name == "__cpu_read_imageii") 00449 return (void *)&read_imageii; 00450 else if (name == "__cpu_read_imageuii") 00451 return (void *)&read_imageuii; 00452 else if (name == "__cpu_read_imageff") 00453 return (void *)&read_imageff; 00454 else if (name == "__cpu_read_imageif") 00455 return (void *)&read_imageif; 00456 else if (name == "__cpu_read_imageuif") 00457 return (void *)&read_imageuif; 00458 00459 else if (name == "debug") 00460 return (void *)&printf; 00461 00462 // Function not found 00463 g_work_group->builtinNotFound(name); 00464 00465 return (void *)&unimplemented_stub; 00466 }