Clover Git
OpenCL 1.1 software implementation
|
00001 /* 00002 * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr> 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions are met: 00007 * * Redistributions of source code must retain the above copyright 00008 * notice, this list of conditions and the following disclaimer. 00009 * * Redistributions in binary form must reproduce the above copyright 00010 * notice, this list of conditions and the following disclaimer in the 00011 * documentation and/or other materials provided with the distribution. 00012 * * Neither the name of the copyright holder nor the 00013 * names of its contributors may be used to endorse or promote products 00014 * derived from this software without specific prior written permission. 00015 * 00016 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 00017 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00018 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00019 * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY 00020 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00021 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00022 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00023 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00024 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00025 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00026 */ 00027 00033 #include "kernel.h" 00034 #include "propertylist.h" 00035 #include "program.h" 00036 #include "memobject.h" 00037 #include "sampler.h" 00038 #include "deviceinterface.h" 00039 00040 #include <string> 00041 #include <iostream> 00042 #include <cstring> 00043 #include <cstdlib> 00044 00045 #include <llvm/Support/Casting.h> 00046 #include <llvm/Module.h> 00047 #include <llvm/Type.h> 00048 #include <llvm/DerivedTypes.h> 00049 00050 using namespace Coal; 00051 Kernel::Kernel(Program *program) 00052 : Object(Object::T_Kernel, program), p_has_locals(false) 00053 { 00054 // TODO: Say a kernel is attached to the program (that becomes unalterable) 00055 00056 null_dep.device = 0; 00057 null_dep.kernel = 0; 00058 null_dep.function = 0; 00059 null_dep.module = 0; 00060 } 00061 00062 Kernel::~Kernel() 00063 { 00064 while (p_device_dependent.size()) 00065 { 00066 DeviceDependent &dep = p_device_dependent.back(); 00067 00068 delete dep.kernel; 00069 00070 p_device_dependent.pop_back(); 00071 } 00072 } 00073 00074 const Kernel::DeviceDependent &Kernel::deviceDependent(DeviceInterface *device) const 00075 { 00076 for (size_t i=0; i<p_device_dependent.size(); ++i) 00077 { 00078 const DeviceDependent &rs = p_device_dependent[i]; 00079 00080 if (rs.device == device || (!device && p_device_dependent.size() == 1)) 00081 return rs; 00082 } 00083 00084 return null_dep; 00085 } 00086 00087 Kernel::DeviceDependent &Kernel::deviceDependent(DeviceInterface *device) 00088 { 00089 for (size_t i=0; i<p_device_dependent.size(); ++i) 00090 { 00091 DeviceDependent &rs = p_device_dependent[i]; 00092 00093 if (rs.device == device || (!device && p_device_dependent.size() == 1)) 00094 return rs; 00095 } 00096 00097 return null_dep; 00098 } 00099 00100 cl_int Kernel::addFunction(DeviceInterface *device, llvm::Function *function, 00101 llvm::Module *module) 00102 { 00103 p_name = function->getNameStr(); 00104 00105 // Add a device dependent 00106 DeviceDependent dep; 00107 00108 dep.device = device; 00109 dep.function = function; 00110 dep.module = module; 00111 00112 // Build the arg list of the kernel (or verify it if a previous function 00113 // was already registered) 00114 llvm::FunctionType *f = function->getFunctionType(); 00115 bool append = (p_args.size() == 0); 00116 00117 if (!append && p_args.size() != f->getNumParams()) 00118 return CL_INVALID_KERNEL_DEFINITION; 00119 00120 for (unsigned int i=0; i<f->getNumParams(); ++i) 00121 { 00122 llvm::Type *arg_type = f->getParamType(i); 00123 Arg::Kind kind = Arg::Invalid; 00124 Arg::File file = Arg::Private; 00125 unsigned short vec_dim = 1; 00126 00127 if (arg_type->isPointerTy()) 00128 { 00129 // It's a pointer, dereference it 00130 llvm::PointerType *p_type = llvm::cast<llvm::PointerType>(arg_type); 00131 00132 file = (Arg::File)p_type->getAddressSpace(); 00133 arg_type = p_type->getElementType(); 00134 00135 // If it's a __local argument, we'll have to allocate memory at run time 00136 if (file == Arg::Local) 00137 p_has_locals = true; 00138 00139 kind = Arg::Buffer; 00140 00141 // If it's a struct, get its name 00142 if (arg_type->isStructTy()) 00143 { 00144 llvm::StructType *struct_type = 00145 llvm::cast<llvm::StructType>(arg_type); 00146 std::string struct_name = struct_type->getName().str(); 00147 00148 if (struct_name.compare(0, 14, "struct.image2d") == 0) 00149 { 00150 kind = Arg::Image2D; 00151 file = Arg::Global; 00152 } 00153 else if (struct_name.compare(0, 14, "struct.image3d") == 0) 00154 { 00155 kind = Arg::Image3D; 00156 file = Arg::Global; 00157 } 00158 } 00159 } 00160 else 00161 { 00162 if (arg_type->isVectorTy()) 00163 { 00164 // It's a vector, we need its element's type 00165 llvm::VectorType *v_type = llvm::cast<llvm::VectorType>(arg_type); 00166 00167 vec_dim = v_type->getNumElements(); 00168 arg_type = v_type->getElementType(); 00169 } 00170 00171 // Get type kind 00172 if (arg_type->isFloatTy()) 00173 { 00174 kind = Arg::Float; 00175 } 00176 else if (arg_type->isDoubleTy()) 00177 { 00178 kind = Arg::Double; 00179 } 00180 else if (arg_type->isIntegerTy()) 00181 { 00182 llvm::IntegerType *i_type = llvm::cast<llvm::IntegerType>(arg_type); 00183 00184 if (i_type->getBitWidth() == 8) 00185 { 00186 kind = Arg::Int8; 00187 } 00188 else if (i_type->getBitWidth() == 16) 00189 { 00190 kind = Arg::Int16; 00191 } 00192 else if (i_type->getBitWidth() == 32) 00193 { 00194 // NOTE: May also be a sampler, check done in setArg 00195 kind = Arg::Int32; 00196 } 00197 else if (i_type->getBitWidth() == 64) 00198 { 00199 kind = Arg::Int64; 00200 } 00201 } 00202 } 00203 00204 // Check if we recognized the type 00205 if (kind == Arg::Invalid) 00206 return CL_INVALID_KERNEL_DEFINITION; 00207 00208 // Create arg 00209 Arg a(vec_dim, file, kind); 00210 00211 // If we also have a function registered, check for signature compliance 00212 if (!append && a != p_args[i]) 00213 return CL_INVALID_KERNEL_DEFINITION; 00214 00215 // Append arg if needed 00216 if (append) 00217 p_args.push_back(a); 00218 } 00219 00220 dep.kernel = device->createDeviceKernel(this, dep.function); 00221 p_device_dependent.push_back(dep); 00222 00223 return CL_SUCCESS; 00224 } 00225 00226 llvm::Function *Kernel::function(DeviceInterface *device) const 00227 { 00228 const DeviceDependent &dep = deviceDependent(device); 00229 00230 return dep.function; 00231 } 00232 00233 cl_int Kernel::setArg(cl_uint index, size_t size, const void *value) 00234 { 00235 if (index > p_args.size()) 00236 return CL_INVALID_ARG_INDEX; 00237 00238 Arg &arg = p_args[index]; 00239 00240 // Special case for __local pointers 00241 if (arg.file() == Arg::Local) 00242 { 00243 if (size == 0) 00244 return CL_INVALID_ARG_SIZE; 00245 00246 if (value != 0) 00247 return CL_INVALID_ARG_VALUE; 00248 00249 arg.setAllocAtKernelRuntime(size); 00250 00251 return CL_SUCCESS; 00252 } 00253 00254 // Check that size corresponds to the arg type 00255 size_t arg_size = arg.valueSize(); 00256 00257 // Special case for samplers (pointers in C++, uint32 in OpenCL). 00258 if (size == sizeof(cl_sampler) && arg_size == 4 && 00259 (*(Object **)value)->isA(T_Sampler)) 00260 { 00261 unsigned int bitfield = (*(Sampler **)value)->bitfield(); 00262 00263 arg.refineKind(Arg::Sampler); 00264 arg.alloc(); 00265 arg.loadData(&bitfield); 00266 00267 return CL_SUCCESS; 00268 } 00269 00270 if (size != arg_size) 00271 return CL_INVALID_ARG_SIZE; 00272 00273 // Check for null values 00274 cl_mem null_mem = 0; 00275 00276 if (!value) 00277 { 00278 switch (arg.kind()) 00279 { 00280 case Arg::Buffer: 00281 case Arg::Image2D: 00282 case Arg::Image3D: 00283 // Special case buffers : value can be 0 (or point to 0) 00284 value = &null_mem; 00285 00286 default: 00287 return CL_INVALID_ARG_VALUE; 00288 } 00289 } 00290 00291 // Copy the data 00292 arg.alloc(); 00293 arg.loadData(value); 00294 00295 return CL_SUCCESS; 00296 } 00297 00298 unsigned int Kernel::numArgs() const 00299 { 00300 return p_args.size(); 00301 } 00302 00303 const Kernel::Arg &Kernel::arg(unsigned int index) const 00304 { 00305 return p_args.at(index); 00306 } 00307 00308 bool Kernel::argsSpecified() const 00309 { 00310 for (size_t i=0; i<p_args.size(); ++i) 00311 { 00312 if (!p_args[i].defined()) 00313 return false; 00314 } 00315 00316 return true; 00317 } 00318 00319 bool Kernel::hasLocals() const 00320 { 00321 return p_has_locals; 00322 } 00323 00324 DeviceKernel *Kernel::deviceDependentKernel(DeviceInterface *device) const 00325 { 00326 const DeviceDependent &dep = deviceDependent(device); 00327 00328 return dep.kernel; 00329 } 00330 00331 cl_int Kernel::info(cl_kernel_info param_name, 00332 size_t param_value_size, 00333 void *param_value, 00334 size_t *param_value_size_ret) const 00335 { 00336 void *value = 0; 00337 size_t value_length = 0; 00338 00339 union { 00340 cl_uint cl_uint_var; 00341 cl_program cl_program_var; 00342 cl_context cl_context_var; 00343 }; 00344 00345 switch (param_name) 00346 { 00347 case CL_KERNEL_FUNCTION_NAME: 00348 MEM_ASSIGN(p_name.size() + 1, p_name.c_str()); 00349 break; 00350 00351 case CL_KERNEL_NUM_ARGS: 00352 SIMPLE_ASSIGN(cl_uint, p_args.size()); 00353 break; 00354 00355 case CL_KERNEL_REFERENCE_COUNT: 00356 SIMPLE_ASSIGN(cl_uint, references()); 00357 break; 00358 00359 case CL_KERNEL_CONTEXT: 00360 SIMPLE_ASSIGN(cl_context, parent()->parent()); 00361 break; 00362 00363 case CL_KERNEL_PROGRAM: 00364 SIMPLE_ASSIGN(cl_program, parent()); 00365 break; 00366 00367 default: 00368 return CL_INVALID_VALUE; 00369 } 00370 00371 if (param_value && param_value_size < value_length) 00372 return CL_INVALID_VALUE; 00373 00374 if (param_value_size_ret) 00375 *param_value_size_ret = value_length; 00376 00377 if (param_value) 00378 std::memcpy(param_value, value, value_length); 00379 00380 return CL_SUCCESS; 00381 } 00382 00383 cl_int Kernel::workGroupInfo(DeviceInterface *device, 00384 cl_kernel_work_group_info param_name, 00385 size_t param_value_size, 00386 void *param_value, 00387 size_t *param_value_size_ret) const 00388 { 00389 void *value = 0; 00390 size_t value_length = 0; 00391 00392 union { 00393 size_t size_t_var; 00394 size_t three_size_t[3]; 00395 cl_ulong cl_ulong_var; 00396 }; 00397 00398 const DeviceDependent &dep = deviceDependent(device); 00399 00400 switch (param_name) 00401 { 00402 case CL_KERNEL_WORK_GROUP_SIZE: 00403 SIMPLE_ASSIGN(size_t, dep.kernel->workGroupSize()); 00404 break; 00405 00406 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: 00407 // TODO: Get this information from the kernel source 00408 three_size_t[0] = 0; 00409 three_size_t[1] = 0; 00410 three_size_t[2] = 0; 00411 value = &three_size_t; 00412 value_length = sizeof(three_size_t); 00413 break; 00414 00415 case CL_KERNEL_LOCAL_MEM_SIZE: 00416 SIMPLE_ASSIGN(cl_ulong, dep.kernel->localMemSize()); 00417 break; 00418 00419 case CL_KERNEL_PRIVATE_MEM_SIZE: 00420 SIMPLE_ASSIGN(cl_ulong, dep.kernel->privateMemSize()); 00421 break; 00422 00423 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: 00424 SIMPLE_ASSIGN(size_t, dep.kernel->preferredWorkGroupSizeMultiple()); 00425 break; 00426 00427 default: 00428 return CL_INVALID_VALUE; 00429 } 00430 00431 if (param_value && param_value_size < value_length) 00432 return CL_INVALID_VALUE; 00433 00434 if (param_value_size_ret) 00435 *param_value_size_ret = value_length; 00436 00437 if (param_value) 00438 std::memcpy(param_value, value, value_length); 00439 00440 return CL_SUCCESS; 00441 } 00442 00443 /* 00444 * Kernel::Arg 00445 */ 00446 Kernel::Arg::Arg(unsigned short vec_dim, File file, Kind kind) 00447 : p_vec_dim(vec_dim), p_file(file), p_kind(kind), p_data(0), p_defined(false), 00448 p_runtime_alloc(0) 00449 { 00450 00451 } 00452 00453 Kernel::Arg::~Arg() 00454 { 00455 if (p_data) 00456 std::free(p_data); 00457 } 00458 00459 void Kernel::Arg::alloc() 00460 { 00461 if (!p_data) 00462 p_data = std::malloc(p_vec_dim * valueSize()); 00463 } 00464 00465 void Kernel::Arg::loadData(const void *data) 00466 { 00467 std::memcpy(p_data, data, p_vec_dim * valueSize()); 00468 p_defined = true; 00469 } 00470 00471 void Kernel::Arg::setAllocAtKernelRuntime(size_t size) 00472 { 00473 p_runtime_alloc = size; 00474 p_defined = true; 00475 } 00476 00477 void Kernel::Arg::refineKind (Kernel::Arg::Kind kind) 00478 { 00479 p_kind = kind; 00480 } 00481 00482 bool Kernel::Arg::operator!=(const Arg &b) 00483 { 00484 bool same = (p_vec_dim == b.p_vec_dim) && 00485 (p_file == b.p_file) && 00486 (p_kind == b.p_kind); 00487 00488 return !same; 00489 } 00490 00491 size_t Kernel::Arg::valueSize() const 00492 { 00493 switch (p_kind) 00494 { 00495 case Invalid: 00496 return 0; 00497 case Int8: 00498 return 1; 00499 case Int16: 00500 return 2; 00501 case Int32: 00502 case Sampler: 00503 return 4; 00504 case Int64: 00505 return 8; 00506 case Float: 00507 return sizeof(cl_float); 00508 case Double: 00509 return sizeof(double); 00510 case Buffer: 00511 case Image2D: 00512 case Image3D: 00513 return sizeof(cl_mem); 00514 } 00515 00516 return 0; 00517 } 00518 00519 unsigned short Kernel::Arg::vecDim() const 00520 { 00521 return p_vec_dim; 00522 } 00523 00524 Kernel::Arg::File Kernel::Arg::file() const 00525 { 00526 return p_file; 00527 } 00528 00529 Kernel::Arg::Kind Kernel::Arg::kind() const 00530 { 00531 return p_kind; 00532 } 00533 00534 bool Kernel::Arg::defined() const 00535 { 00536 return p_defined; 00537 } 00538 00539 size_t Kernel::Arg::allocAtKernelRuntime() const 00540 { 00541 return p_runtime_alloc; 00542 } 00543 00544 const void *Kernel::Arg::value(unsigned short index) const 00545 { 00546 const char *data = (const char *)p_data; 00547 unsigned int offset = index * valueSize(); 00548 00549 data += offset; 00550 00551 return (const void *)data; 00552 } 00553 00554 const void *Kernel::Arg::data() const 00555 { 00556 return p_data; 00557 }