Clover Git
OpenCL 1.1 software implementation
|
00001 /* 00002 * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr> 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions are met: 00007 * * Redistributions of source code must retain the above copyright 00008 * notice, this list of conditions and the following disclaimer. 00009 * * Redistributions in binary form must reproduce the above copyright 00010 * notice, this list of conditions and the following disclaimer in the 00011 * documentation and/or other materials provided with the distribution. 00012 * * Neither the name of the copyright holder nor the 00013 * names of its contributors may be used to endorse or promote products 00014 * derived from this software without specific prior written permission. 00015 * 00016 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 00017 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00018 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00019 * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY 00020 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00021 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00022 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00023 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00024 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00025 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00026 */ 00027 00033 #include "program.h" 00034 #include "context.h" 00035 #include "compiler.h" 00036 #include "kernel.h" 00037 #include "propertylist.h" 00038 #include "deviceinterface.h" 00039 00040 #include <string> 00041 #include <cstring> 00042 #include <cstdlib> 00043 #include <iostream> 00044 #include <vector> 00045 #include <set> 00046 #include <algorithm> 00047 00048 #include <llvm/ADT/StringRef.h> 00049 #include <llvm/ADT/SmallVector.h> 00050 #include <llvm/Support/MemoryBuffer.h> 00051 #include <llvm/Support/raw_ostream.h> 00052 #include <llvm/Support/Casting.h> 00053 #include <llvm/Bitcode/ReaderWriter.h> 00054 #include <llvm/Transforms/IPO.h> 00055 #include <llvm/LLVMContext.h> 00056 #include <llvm/Module.h> 00057 #include <llvm/Linker.h> 00058 #include <llvm/PassManager.h> 00059 #include <llvm/Metadata.h> 00060 #include <llvm/Function.h> 00061 #include <llvm/Analysis/Passes.h> 00062 #include <llvm/Transforms/IPO.h> 00063 00064 #include <runtime/stdlib.h.embed.h> 00065 #include <runtime/stdlib.c.bc.embed.h> 00066 00067 using namespace Coal; 00068 00069 Program::Program(Context *ctx) 00070 : Object(Object::T_Program, ctx), p_type(Invalid), p_state(Empty) 00071 { 00072 p_null_device_dependent.compiler = 0; 00073 p_null_device_dependent.device = 0; 00074 p_null_device_dependent.linked_module = 0; 00075 p_null_device_dependent.program = 0; 00076 } 00077 00078 Program::~Program() 00079 { 00080 while (p_device_dependent.size()) 00081 { 00082 DeviceDependent &dep = p_device_dependent.back(); 00083 00084 delete dep.compiler; 00085 delete dep.program; 00086 delete dep.linked_module; 00087 00088 p_device_dependent.pop_back(); 00089 } 00090 } 00091 00092 void Program::setDevices(cl_uint num_devices, DeviceInterface * const*devices) 00093 { 00094 p_device_dependent.resize(num_devices); 00095 00096 for (cl_uint i=0; i<num_devices; ++i) 00097 { 00098 DeviceDependent &dep = p_device_dependent[i]; 00099 00100 dep.device = devices[i]; 00101 dep.program = dep.device->createDeviceProgram(this); 00102 dep.linked_module = 0; 00103 dep.compiler = new Compiler(dep.device); 00104 } 00105 } 00106 00107 Program::DeviceDependent &Program::deviceDependent(DeviceInterface *device) 00108 { 00109 for (size_t i=0; i<p_device_dependent.size(); ++i) 00110 { 00111 DeviceDependent &rs = p_device_dependent[i]; 00112 00113 if (rs.device == device || (!device && p_device_dependent.size() == 1)) 00114 return rs; 00115 } 00116 00117 return p_null_device_dependent; 00118 } 00119 00120 const Program::DeviceDependent &Program::deviceDependent(DeviceInterface *device) const 00121 { 00122 for (size_t i=0; i<p_device_dependent.size(); ++i) 00123 { 00124 const DeviceDependent &rs = p_device_dependent[i]; 00125 00126 if (rs.device == device || (!device && p_device_dependent.size() == 1)) 00127 return rs; 00128 } 00129 00130 return p_null_device_dependent; 00131 } 00132 00133 DeviceProgram *Program::deviceDependentProgram(DeviceInterface *device) const 00134 { 00135 const DeviceDependent &dep = deviceDependent(device); 00136 00137 return dep.program; 00138 } 00139 00140 std::vector<llvm::Function *> Program::kernelFunctions(DeviceDependent &dep) 00141 { 00142 std::vector<llvm::Function *> rs; 00143 00144 llvm::NamedMDNode *kernels = dep.linked_module->getNamedMetadata("opencl.kernels"); 00145 00146 if (!kernels) 00147 return rs; 00148 00149 for (unsigned int i=0; i<kernels->getNumOperands(); ++i) 00150 { 00151 llvm::MDNode *node = kernels->getOperand(i); 00152 00153 // Each node has only one operand : a llvm::Function 00154 llvm::Value *value = node->getOperand(0); 00155 00156 if (!llvm::isa<llvm::Function>(value)) 00157 continue; // Bug somewhere, don't crash 00158 00159 llvm::Function *f = llvm::cast<llvm::Function>(value); 00160 rs.push_back(f); 00161 } 00162 00163 return rs; 00164 } 00165 00166 Kernel *Program::createKernel(const std::string &name, cl_int *errcode_ret) 00167 { 00168 Kernel *rs = new Kernel(this); 00169 00170 // Add a function definition for each device 00171 for (size_t i=0; i<p_device_dependent.size(); ++i) 00172 { 00173 bool found = false; 00174 DeviceDependent &dep = p_device_dependent[i]; 00175 const std::vector<llvm::Function *> &kernels = kernelFunctions(dep); 00176 00177 // Find the one with the good name 00178 for (size_t j=0; j<kernels.size(); ++j) 00179 { 00180 llvm::Function *func = kernels[j]; 00181 00182 if (func->getNameStr() == name) 00183 { 00184 found = true; 00185 *errcode_ret = rs->addFunction(dep.device, func, dep.linked_module); 00186 00187 if (*errcode_ret != CL_SUCCESS) 00188 return rs; 00189 00190 break; 00191 } 00192 } 00193 00194 if (!found) 00195 { 00196 // Kernel unavailable for this device 00197 *errcode_ret = CL_INVALID_KERNEL_NAME; 00198 return rs; 00199 } 00200 } 00201 00202 return rs; 00203 } 00204 00205 std::vector<Kernel *> Program::createKernels(cl_int *errcode_ret) 00206 { 00207 std::vector<Kernel *> rs; 00208 00209 // We should never go here 00210 if (p_device_dependent.size() == 0) 00211 return rs; 00212 00213 // Take the list of kernels for the first device dependent 00214 DeviceDependent &dep = p_device_dependent[0]; 00215 const std::vector<llvm::Function *> &kernels = kernelFunctions(dep); 00216 00217 // Create the kernel for each function name 00218 // It returns an error if the signature is not the same for every device 00219 // or if the kernel isn't found on all the devices. 00220 for (size_t i=0; i<kernels.size(); ++i) 00221 { 00222 cl_int result = CL_SUCCESS; 00223 Kernel *kernel = createKernel(kernels[i]->getNameStr(), &result); 00224 00225 if (result == CL_SUCCESS) 00226 { 00227 rs.push_back(kernel); 00228 } 00229 else 00230 { 00231 delete kernel; 00232 } 00233 } 00234 00235 return rs; 00236 } 00237 00238 cl_int Program::loadSources(cl_uint count, const char **strings, 00239 const size_t *lengths) 00240 { 00241 p_source = std::string(embed_stdlib_h); 00242 00243 // Merge all strings into one big one 00244 for (cl_uint i=0; i<count; ++i) 00245 { 00246 size_t len = 0; 00247 const char *data = strings[i]; 00248 00249 if (!data) 00250 return CL_INVALID_VALUE; 00251 00252 // Get the length of the source 00253 if (lengths && lengths[i]) 00254 len = lengths[i]; 00255 else 00256 len = std::strlen(data); 00257 00258 // Remove trailing \0's, it's not good for sources (it can arise when 00259 // the client application wrongly sets lengths 00260 while (len > 0 && data[len-1] == 0) 00261 len--; 00262 00263 // Merge the string 00264 std::string part(data, len); 00265 p_source += part; 00266 } 00267 00268 p_type = Source; 00269 p_state = Loaded; 00270 00271 return CL_SUCCESS; 00272 } 00273 00274 cl_int Program::loadBinaries(const unsigned char **data, const size_t *lengths, 00275 cl_int *binary_status, cl_uint num_devices, 00276 DeviceInterface * const*device_list) 00277 { 00278 // Set device infos 00279 setDevices(num_devices, device_list); 00280 00281 // Load the data 00282 for (cl_uint i=0; i<num_devices; ++i) 00283 { 00284 DeviceDependent &dep = deviceDependent(device_list[i]); 00285 00286 // Load bitcode 00287 dep.unlinked_binary = std::string((const char *)data[i], lengths[i]); 00288 00289 // Make a module of it 00290 const llvm::StringRef s_data(dep.unlinked_binary); 00291 const llvm::StringRef s_name("<binary>"); 00292 00293 llvm::MemoryBuffer *buffer = llvm::MemoryBuffer::getMemBuffer(s_data, 00294 s_name, 00295 false); 00296 00297 if (!buffer) 00298 return CL_OUT_OF_HOST_MEMORY; 00299 00300 dep.linked_module = ParseBitcodeFile(buffer, llvm::getGlobalContext()); 00301 00302 if (!dep.linked_module) 00303 { 00304 binary_status[i] = CL_INVALID_VALUE; 00305 return CL_INVALID_BINARY; 00306 } 00307 00308 binary_status[i] = CL_SUCCESS; 00309 } 00310 00311 p_type = Binary; 00312 p_state = Loaded; 00313 00314 return CL_SUCCESS; 00315 } 00316 00317 cl_int Program::build(const char *options, 00318 void (CL_CALLBACK *pfn_notify)(cl_program program, 00319 void *user_data), 00320 void *user_data, cl_uint num_devices, 00321 DeviceInterface * const*device_list) 00322 { 00323 p_state = Failed; 00324 00325 // Set device infos 00326 if (!p_device_dependent.size()) 00327 { 00328 setDevices(num_devices, device_list); 00329 } 00330 00331 for (cl_uint i=0; i<p_device_dependent.size(); ++i) 00332 { 00333 DeviceDependent &dep = deviceDependent(device_list[i]); 00334 00335 // Do we need to compile the source for each device ? 00336 if (p_type == Source) 00337 { 00338 // Load source 00339 const llvm::StringRef s_data(p_source); 00340 const llvm::StringRef s_name("<source>"); 00341 00342 llvm::MemoryBuffer *buffer = llvm::MemoryBuffer::getMemBuffer(s_data, 00343 s_name); 00344 00345 // Compile 00346 if (!dep.compiler->compile(options ? options : std::string(), buffer)) 00347 { 00348 if (pfn_notify) 00349 pfn_notify((cl_program)this, user_data); 00350 00351 return CL_BUILD_PROGRAM_FAILURE; 00352 } 00353 00354 // Get module and its bitcode 00355 dep.linked_module = dep.compiler->module(); 00356 00357 llvm::raw_string_ostream ostream(dep.unlinked_binary); 00358 llvm::WriteBitcodeToFile(dep.linked_module, ostream); 00359 ostream.flush(); 00360 } 00361 00362 // Link p_linked_module with the stdlib if the device needs that 00363 if (dep.program->linkStdLib()) 00364 { 00365 // Load the stdlib bitcode 00366 const llvm::StringRef s_data(embed_stdlib_c_bc, 00367 sizeof(embed_stdlib_c_bc) - 1); 00368 const llvm::StringRef s_name("stdlib.bc"); 00369 std::string errMsg; 00370 00371 llvm::MemoryBuffer *buffer = llvm::MemoryBuffer::getMemBuffer(s_data, 00372 s_name, 00373 false); 00374 00375 if (!buffer) 00376 return CL_OUT_OF_HOST_MEMORY; 00377 00378 llvm::Module *stdlib = ParseBitcodeFile(buffer, 00379 llvm::getGlobalContext(), 00380 &errMsg); 00381 00382 // Link 00383 if (!stdlib || 00384 llvm::Linker::LinkModules(dep.linked_module, stdlib, &errMsg)) 00385 { 00386 dep.compiler->appendLog("link error: "); 00387 dep.compiler->appendLog(errMsg); 00388 dep.compiler->appendLog("\n"); 00389 00390 // DEBUG 00391 std::cout << dep.compiler->log() << std::endl; 00392 00393 if (pfn_notify) 00394 pfn_notify((cl_program)this, user_data); 00395 00396 return CL_BUILD_PROGRAM_FAILURE; 00397 } 00398 } 00399 00400 // Get list of kernels to strip other unused functions 00401 std::vector<const char *> api; 00402 std::vector<std::string> api_s; // Needed to keep valid data in api 00403 const std::vector<llvm::Function *> &kernels = kernelFunctions(dep); 00404 00405 for (size_t j=0; j<kernels.size(); ++j) 00406 { 00407 std::string s = kernels[j]->getNameStr(); 00408 00409 api_s.push_back(s); 00410 api.push_back(s.c_str()); 00411 } 00412 00413 // Optimize code 00414 llvm::PassManager *manager = new llvm::PassManager(); 00415 00416 // Common passes (primary goal : remove unused stdlib functions) 00417 manager->add(llvm::createTypeBasedAliasAnalysisPass()); 00418 manager->add(llvm::createBasicAliasAnalysisPass()); 00419 manager->add(llvm::createInternalizePass(api)); 00420 manager->add(llvm::createIPSCCPPass()); 00421 manager->add(llvm::createGlobalOptimizerPass()); 00422 manager->add(llvm::createConstantMergePass()); 00423 00424 dep.program->createOptimizationPasses(manager, dep.compiler->optimize()); 00425 00426 manager->add(llvm::createGlobalDCEPass()); 00427 00428 manager->run(*dep.linked_module); 00429 delete manager; 00430 00431 // Now that the LLVM module is built, build the device-specific 00432 // representation 00433 if (!dep.program->build(dep.linked_module)) 00434 { 00435 if (pfn_notify) 00436 pfn_notify((cl_program)this, user_data); 00437 00438 return CL_BUILD_PROGRAM_FAILURE; 00439 } 00440 } 00441 00442 // TODO: Asynchronous compile 00443 if (pfn_notify) 00444 pfn_notify((cl_program)this, user_data); 00445 00446 p_state = Built; 00447 00448 return CL_SUCCESS; 00449 } 00450 00451 Program::Type Program::type() const 00452 { 00453 return p_type; 00454 } 00455 00456 Program::State Program::state() const 00457 { 00458 return p_state; 00459 } 00460 00461 cl_int Program::info(cl_program_info param_name, 00462 size_t param_value_size, 00463 void *param_value, 00464 size_t *param_value_size_ret) const 00465 { 00466 void *value = 0; 00467 size_t value_length = 0; 00468 llvm::SmallVector<size_t, 4> binary_sizes; 00469 llvm::SmallVector<DeviceInterface *, 4> devices; 00470 00471 union { 00472 cl_uint cl_uint_var; 00473 cl_context cl_context_var; 00474 }; 00475 00476 switch (param_name) 00477 { 00478 case CL_PROGRAM_REFERENCE_COUNT: 00479 SIMPLE_ASSIGN(cl_uint, references()); 00480 break; 00481 00482 case CL_PROGRAM_NUM_DEVICES: 00483 SIMPLE_ASSIGN(cl_uint, p_device_dependent.size()); 00484 break; 00485 00486 case CL_PROGRAM_DEVICES: 00487 for (size_t i=0; i<p_device_dependent.size(); ++i) 00488 { 00489 const DeviceDependent &dep = p_device_dependent[i]; 00490 00491 devices.push_back(dep.device); 00492 } 00493 00494 value = devices.data(); 00495 value_length = devices.size() * sizeof(DeviceInterface *); 00496 break; 00497 00498 case CL_PROGRAM_CONTEXT: 00499 SIMPLE_ASSIGN(cl_context, parent()); 00500 break; 00501 00502 case CL_PROGRAM_SOURCE: 00503 MEM_ASSIGN(p_source.size() + 1, p_source.c_str()); 00504 break; 00505 00506 case CL_PROGRAM_BINARY_SIZES: 00507 for (size_t i=0; i<p_device_dependent.size(); ++i) 00508 { 00509 const DeviceDependent &dep = p_device_dependent[i]; 00510 00511 binary_sizes.push_back(dep.unlinked_binary.size()); 00512 } 00513 00514 value = binary_sizes.data(); 00515 value_length = binary_sizes.size() * sizeof(size_t); 00516 break; 00517 00518 case CL_PROGRAM_BINARIES: 00519 { 00520 // Special case : param_value points to an array of p_num_devices 00521 // application-allocated unsigned char* pointers. Check it's good 00522 // and std::memcpy the data 00523 00524 unsigned char **binaries = (unsigned char **)param_value; 00525 value_length = p_device_dependent.size() * sizeof(unsigned char *); 00526 00527 if (!param_value || param_value_size < value_length) 00528 return CL_INVALID_VALUE; 00529 00530 for (size_t i=0; i<p_device_dependent.size(); ++i) 00531 { 00532 const DeviceDependent &dep = p_device_dependent[i]; 00533 unsigned char *dest = binaries[i]; 00534 00535 if (!dest) 00536 continue; 00537 00538 std::memcpy(dest, dep.unlinked_binary.data(), 00539 dep.unlinked_binary.size()); 00540 } 00541 00542 if (param_value_size_ret) 00543 *param_value_size_ret = value_length; 00544 00545 return CL_SUCCESS; 00546 } 00547 00548 default: 00549 return CL_INVALID_VALUE; 00550 } 00551 00552 if (param_value && param_value_size < value_length) 00553 return CL_INVALID_VALUE; 00554 00555 if (param_value_size_ret) 00556 *param_value_size_ret = value_length; 00557 00558 if (param_value) 00559 std::memcpy(param_value, value, value_length); 00560 00561 return CL_SUCCESS; 00562 } 00563 00564 cl_int Program::buildInfo(DeviceInterface *device, 00565 cl_program_build_info param_name, 00566 size_t param_value_size, 00567 void *param_value, 00568 size_t *param_value_size_ret) const 00569 { 00570 const void *value = 0; 00571 size_t value_length = 0; 00572 const DeviceDependent &dep = deviceDependent(device); 00573 00574 union { 00575 cl_build_status cl_build_status_var; 00576 }; 00577 00578 switch (param_name) 00579 { 00580 case CL_PROGRAM_BUILD_STATUS: 00581 switch (p_state) 00582 { 00583 case Empty: 00584 case Loaded: 00585 SIMPLE_ASSIGN(cl_build_status, CL_BUILD_NONE); 00586 break; 00587 case Built: 00588 SIMPLE_ASSIGN(cl_build_status, CL_BUILD_SUCCESS); 00589 break; 00590 case Failed: 00591 SIMPLE_ASSIGN(cl_build_status, CL_BUILD_ERROR); 00592 break; 00593 // TODO: CL_BUILD_IN_PROGRESS 00594 } 00595 break; 00596 00597 case CL_PROGRAM_BUILD_OPTIONS: 00598 value = dep.compiler->options().c_str(); 00599 value_length = dep.compiler->options().size() + 1; 00600 break; 00601 00602 case CL_PROGRAM_BUILD_LOG: 00603 value = dep.compiler->log().c_str(); 00604 value_length = dep.compiler->log().size() + 1; 00605 break; 00606 00607 default: 00608 return CL_INVALID_VALUE; 00609 } 00610 00611 if (param_value && param_value_size < value_length) 00612 return CL_INVALID_VALUE; 00613 00614 if (param_value_size_ret) 00615 *param_value_size_ret = value_length; 00616 00617 if (param_value) 00618 std::memcpy(param_value, value, value_length); 00619 00620 return CL_SUCCESS; 00621 }