Clover Git
OpenCL 1.1 software implementation

program.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions are met:
00007  *     * Redistributions of source code must retain the above copyright
00008  *       notice, this list of conditions and the following disclaimer.
00009  *     * Redistributions in binary form must reproduce the above copyright
00010  *       notice, this list of conditions and the following disclaimer in the
00011  *       documentation and/or other materials provided with the distribution.
00012  *     * Neither the name of the copyright holder nor the
00013  *       names of its contributors may be used to endorse or promote products
00014  *       derived from this software without specific prior written permission.
00015  *
00016  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
00017  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00018  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00019  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
00020  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00021  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00022  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00023  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00024  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00025  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00026  */
00027 
00033 #include "program.h"
00034 #include "context.h"
00035 #include "compiler.h"
00036 #include "kernel.h"
00037 #include "propertylist.h"
00038 #include "deviceinterface.h"
00039 
00040 #include <string>
00041 #include <cstring>
00042 #include <cstdlib>
00043 #include <iostream>
00044 #include <vector>
00045 #include <set>
00046 #include <algorithm>
00047 
00048 #include <llvm/ADT/StringRef.h>
00049 #include <llvm/ADT/SmallVector.h>
00050 #include <llvm/Support/MemoryBuffer.h>
00051 #include <llvm/Support/raw_ostream.h>
00052 #include <llvm/Support/Casting.h>
00053 #include <llvm/Bitcode/ReaderWriter.h>
00054 #include <llvm/Transforms/IPO.h>
00055 #include <llvm/LLVMContext.h>
00056 #include <llvm/Module.h>
00057 #include <llvm/Linker.h>
00058 #include <llvm/PassManager.h>
00059 #include <llvm/Metadata.h>
00060 #include <llvm/Function.h>
00061 #include <llvm/Analysis/Passes.h>
00062 #include <llvm/Transforms/IPO.h>
00063 
00064 #include <runtime/stdlib.h.embed.h>
00065 #include <runtime/stdlib.c.bc.embed.h>
00066 
00067 using namespace Coal;
00068 
00069 Program::Program(Context *ctx)
00070 : Object(Object::T_Program, ctx), p_type(Invalid), p_state(Empty)
00071 {
00072     p_null_device_dependent.compiler = 0;
00073     p_null_device_dependent.device = 0;
00074     p_null_device_dependent.linked_module = 0;
00075     p_null_device_dependent.program = 0;
00076 }
00077 
00078 Program::~Program()
00079 {
00080     while (p_device_dependent.size())
00081     {
00082         DeviceDependent &dep = p_device_dependent.back();
00083 
00084         delete dep.compiler;
00085         delete dep.program;
00086         delete dep.linked_module;
00087 
00088         p_device_dependent.pop_back();
00089     }
00090 }
00091 
00092 void Program::setDevices(cl_uint num_devices, DeviceInterface * const*devices)
00093 {
00094     p_device_dependent.resize(num_devices);
00095 
00096     for (cl_uint i=0; i<num_devices; ++i)
00097     {
00098         DeviceDependent &dep = p_device_dependent[i];
00099 
00100         dep.device = devices[i];
00101         dep.program = dep.device->createDeviceProgram(this);
00102         dep.linked_module = 0;
00103         dep.compiler = new Compiler(dep.device);
00104     }
00105 }
00106 
00107 Program::DeviceDependent &Program::deviceDependent(DeviceInterface *device)
00108 {
00109     for (size_t i=0; i<p_device_dependent.size(); ++i)
00110     {
00111         DeviceDependent &rs = p_device_dependent[i];
00112 
00113         if (rs.device == device || (!device && p_device_dependent.size() == 1))
00114             return rs;
00115     }
00116 
00117     return p_null_device_dependent;
00118 }
00119 
00120 const Program::DeviceDependent &Program::deviceDependent(DeviceInterface *device) const
00121 {
00122     for (size_t i=0; i<p_device_dependent.size(); ++i)
00123     {
00124         const DeviceDependent &rs = p_device_dependent[i];
00125 
00126         if (rs.device == device || (!device && p_device_dependent.size() == 1))
00127             return rs;
00128     }
00129 
00130     return p_null_device_dependent;
00131 }
00132 
00133 DeviceProgram *Program::deviceDependentProgram(DeviceInterface *device) const
00134 {
00135     const DeviceDependent &dep = deviceDependent(device);
00136 
00137     return dep.program;
00138 }
00139 
00140 std::vector<llvm::Function *> Program::kernelFunctions(DeviceDependent &dep)
00141 {
00142     std::vector<llvm::Function *> rs;
00143 
00144     llvm::NamedMDNode *kernels = dep.linked_module->getNamedMetadata("opencl.kernels");
00145 
00146     if (!kernels)
00147         return rs;
00148 
00149     for (unsigned int i=0; i<kernels->getNumOperands(); ++i)
00150     {
00151         llvm::MDNode *node = kernels->getOperand(i);
00152 
00153         // Each node has only one operand : a llvm::Function
00154         llvm::Value *value = node->getOperand(0);
00155 
00156         if (!llvm::isa<llvm::Function>(value))
00157             continue;       // Bug somewhere, don't crash
00158 
00159         llvm::Function *f = llvm::cast<llvm::Function>(value);
00160         rs.push_back(f);
00161     }
00162 
00163     return rs;
00164 }
00165 
00166 Kernel *Program::createKernel(const std::string &name, cl_int *errcode_ret)
00167 {
00168     Kernel *rs = new Kernel(this);
00169 
00170     // Add a function definition for each device
00171     for (size_t i=0; i<p_device_dependent.size(); ++i)
00172     {
00173         bool found = false;
00174         DeviceDependent &dep = p_device_dependent[i];
00175         const std::vector<llvm::Function *> &kernels = kernelFunctions(dep);
00176 
00177         // Find the one with the good name
00178         for (size_t j=0; j<kernels.size(); ++j)
00179         {
00180             llvm::Function *func = kernels[j];
00181 
00182             if (func->getNameStr() == name)
00183             {
00184                 found = true;
00185                 *errcode_ret = rs->addFunction(dep.device, func, dep.linked_module);
00186 
00187                 if (*errcode_ret != CL_SUCCESS)
00188                     return rs;
00189 
00190                 break;
00191             }
00192         }
00193 
00194         if (!found)
00195         {
00196             // Kernel unavailable for this device
00197             *errcode_ret = CL_INVALID_KERNEL_NAME;
00198             return rs;
00199         }
00200     }
00201 
00202     return rs;
00203 }
00204 
00205 std::vector<Kernel *> Program::createKernels(cl_int *errcode_ret)
00206 {
00207     std::vector<Kernel *> rs;
00208 
00209     // We should never go here
00210     if (p_device_dependent.size() == 0)
00211         return rs;
00212 
00213     // Take the list of kernels for the first device dependent
00214     DeviceDependent &dep = p_device_dependent[0];
00215     const std::vector<llvm::Function *> &kernels = kernelFunctions(dep);
00216 
00217     // Create the kernel for each function name
00218     // It returns an error if the signature is not the same for every device
00219     // or if the kernel isn't found on all the devices.
00220     for (size_t i=0; i<kernels.size(); ++i)
00221     {
00222         cl_int result = CL_SUCCESS;
00223         Kernel *kernel = createKernel(kernels[i]->getNameStr(), &result);
00224 
00225         if (result == CL_SUCCESS)
00226         {
00227             rs.push_back(kernel);
00228         }
00229         else
00230         {
00231             delete kernel;
00232         }
00233     }
00234 
00235     return rs;
00236 }
00237 
00238 cl_int Program::loadSources(cl_uint count, const char **strings,
00239                             const size_t *lengths)
00240 {
00241     p_source = std::string(embed_stdlib_h);
00242 
00243     // Merge all strings into one big one
00244     for (cl_uint i=0; i<count; ++i)
00245     {
00246         size_t len = 0;
00247         const char *data = strings[i];
00248 
00249         if (!data)
00250             return CL_INVALID_VALUE;
00251 
00252         // Get the length of the source
00253         if (lengths && lengths[i])
00254             len = lengths[i];
00255         else
00256             len = std::strlen(data);
00257 
00258         // Remove trailing \0's, it's not good for sources (it can arise when
00259         // the client application wrongly sets lengths
00260         while (len > 0 && data[len-1] == 0)
00261             len--;
00262 
00263         // Merge the string
00264         std::string part(data, len);
00265         p_source += part;
00266     }
00267 
00268     p_type = Source;
00269     p_state = Loaded;
00270 
00271     return CL_SUCCESS;
00272 }
00273 
00274 cl_int Program::loadBinaries(const unsigned char **data, const size_t *lengths,
00275                              cl_int *binary_status, cl_uint num_devices,
00276                              DeviceInterface * const*device_list)
00277 {
00278     // Set device infos
00279     setDevices(num_devices, device_list);
00280 
00281     // Load the data
00282     for (cl_uint i=0; i<num_devices; ++i)
00283     {
00284         DeviceDependent &dep = deviceDependent(device_list[i]);
00285 
00286         // Load bitcode
00287         dep.unlinked_binary = std::string((const char *)data[i], lengths[i]);
00288 
00289         // Make a module of it
00290         const llvm::StringRef s_data(dep.unlinked_binary);
00291         const llvm::StringRef s_name("<binary>");
00292 
00293         llvm::MemoryBuffer *buffer = llvm::MemoryBuffer::getMemBuffer(s_data,
00294                                                                       s_name,
00295                                                                       false);
00296 
00297         if (!buffer)
00298             return CL_OUT_OF_HOST_MEMORY;
00299 
00300         dep.linked_module = ParseBitcodeFile(buffer, llvm::getGlobalContext());
00301 
00302         if (!dep.linked_module)
00303         {
00304             binary_status[i] = CL_INVALID_VALUE;
00305             return CL_INVALID_BINARY;
00306         }
00307 
00308         binary_status[i] = CL_SUCCESS;
00309     }
00310 
00311     p_type = Binary;
00312     p_state = Loaded;
00313 
00314     return CL_SUCCESS;
00315 }
00316 
00317 cl_int Program::build(const char *options,
00318                       void (CL_CALLBACK *pfn_notify)(cl_program program,
00319                                                      void *user_data),
00320                       void *user_data, cl_uint num_devices,
00321                       DeviceInterface * const*device_list)
00322 {
00323     p_state = Failed;
00324 
00325     // Set device infos
00326     if (!p_device_dependent.size())
00327     {
00328         setDevices(num_devices, device_list);
00329     }
00330 
00331     for (cl_uint i=0; i<p_device_dependent.size(); ++i)
00332     {
00333         DeviceDependent &dep = deviceDependent(device_list[i]);
00334 
00335         // Do we need to compile the source for each device ?
00336         if (p_type == Source)
00337         {
00338             // Load source
00339             const llvm::StringRef s_data(p_source);
00340             const llvm::StringRef s_name("<source>");
00341 
00342             llvm::MemoryBuffer *buffer = llvm::MemoryBuffer::getMemBuffer(s_data,
00343                                                                         s_name);
00344 
00345             // Compile
00346             if (!dep.compiler->compile(options ? options : std::string(), buffer))
00347             {
00348                 if (pfn_notify)
00349                     pfn_notify((cl_program)this, user_data);
00350 
00351                 return CL_BUILD_PROGRAM_FAILURE;
00352             }
00353 
00354             // Get module and its bitcode
00355             dep.linked_module = dep.compiler->module();
00356 
00357             llvm::raw_string_ostream ostream(dep.unlinked_binary);
00358             llvm::WriteBitcodeToFile(dep.linked_module, ostream);
00359             ostream.flush();
00360         }
00361 
00362         // Link p_linked_module with the stdlib if the device needs that
00363         if (dep.program->linkStdLib())
00364         {
00365             // Load the stdlib bitcode
00366             const llvm::StringRef s_data(embed_stdlib_c_bc,
00367                                          sizeof(embed_stdlib_c_bc) - 1);
00368             const llvm::StringRef s_name("stdlib.bc");
00369             std::string errMsg;
00370 
00371             llvm::MemoryBuffer *buffer = llvm::MemoryBuffer::getMemBuffer(s_data,
00372                                                                           s_name,
00373                                                                           false);
00374 
00375             if (!buffer)
00376                 return CL_OUT_OF_HOST_MEMORY;
00377 
00378             llvm::Module *stdlib = ParseBitcodeFile(buffer,
00379                                                     llvm::getGlobalContext(),
00380                                                     &errMsg);
00381 
00382             // Link
00383             if (!stdlib ||
00384                 llvm::Linker::LinkModules(dep.linked_module, stdlib, &errMsg))
00385             {
00386                 dep.compiler->appendLog("link error: ");
00387                 dep.compiler->appendLog(errMsg);
00388                 dep.compiler->appendLog("\n");
00389 
00390                 // DEBUG
00391                 std::cout << dep.compiler->log() << std::endl;
00392 
00393                 if (pfn_notify)
00394                     pfn_notify((cl_program)this, user_data);
00395 
00396                 return CL_BUILD_PROGRAM_FAILURE;
00397             }
00398         }
00399 
00400         // Get list of kernels to strip other unused functions
00401         std::vector<const char *> api;
00402         std::vector<std::string> api_s;     // Needed to keep valid data in api
00403         const std::vector<llvm::Function *> &kernels = kernelFunctions(dep);
00404 
00405         for (size_t j=0; j<kernels.size(); ++j)
00406         {
00407             std::string s = kernels[j]->getNameStr();
00408 
00409             api_s.push_back(s);
00410             api.push_back(s.c_str());
00411         }
00412 
00413         // Optimize code
00414         llvm::PassManager *manager = new llvm::PassManager();
00415 
00416         // Common passes (primary goal : remove unused stdlib functions)
00417         manager->add(llvm::createTypeBasedAliasAnalysisPass());
00418         manager->add(llvm::createBasicAliasAnalysisPass());
00419         manager->add(llvm::createInternalizePass(api));
00420         manager->add(llvm::createIPSCCPPass());
00421         manager->add(llvm::createGlobalOptimizerPass());
00422         manager->add(llvm::createConstantMergePass());
00423 
00424         dep.program->createOptimizationPasses(manager, dep.compiler->optimize());
00425 
00426         manager->add(llvm::createGlobalDCEPass());
00427 
00428         manager->run(*dep.linked_module);
00429         delete manager;
00430 
00431         // Now that the LLVM module is built, build the device-specific
00432         // representation
00433         if (!dep.program->build(dep.linked_module))
00434         {
00435             if (pfn_notify)
00436                 pfn_notify((cl_program)this, user_data);
00437 
00438             return CL_BUILD_PROGRAM_FAILURE;
00439         }
00440     }
00441 
00442     // TODO: Asynchronous compile
00443     if (pfn_notify)
00444         pfn_notify((cl_program)this, user_data);
00445 
00446     p_state = Built;
00447 
00448     return CL_SUCCESS;
00449 }
00450 
00451 Program::Type Program::type() const
00452 {
00453     return p_type;
00454 }
00455 
00456 Program::State Program::state() const
00457 {
00458     return p_state;
00459 }
00460 
00461 cl_int Program::info(cl_program_info param_name,
00462                      size_t param_value_size,
00463                      void *param_value,
00464                      size_t *param_value_size_ret) const
00465 {
00466     void *value = 0;
00467     size_t value_length = 0;
00468     llvm::SmallVector<size_t, 4> binary_sizes;
00469     llvm::SmallVector<DeviceInterface *, 4> devices;
00470 
00471     union {
00472         cl_uint cl_uint_var;
00473         cl_context cl_context_var;
00474     };
00475 
00476     switch (param_name)
00477     {
00478         case CL_PROGRAM_REFERENCE_COUNT:
00479             SIMPLE_ASSIGN(cl_uint, references());
00480             break;
00481 
00482         case CL_PROGRAM_NUM_DEVICES:
00483             SIMPLE_ASSIGN(cl_uint, p_device_dependent.size());
00484             break;
00485 
00486         case CL_PROGRAM_DEVICES:
00487             for (size_t i=0; i<p_device_dependent.size(); ++i)
00488             {
00489                 const DeviceDependent &dep = p_device_dependent[i];
00490 
00491                 devices.push_back(dep.device);
00492             }
00493 
00494             value = devices.data();
00495             value_length = devices.size() * sizeof(DeviceInterface *);
00496             break;
00497 
00498         case CL_PROGRAM_CONTEXT:
00499             SIMPLE_ASSIGN(cl_context, parent());
00500             break;
00501 
00502         case CL_PROGRAM_SOURCE:
00503             MEM_ASSIGN(p_source.size() + 1, p_source.c_str());
00504             break;
00505 
00506         case CL_PROGRAM_BINARY_SIZES:
00507             for (size_t i=0; i<p_device_dependent.size(); ++i)
00508             {
00509                 const DeviceDependent &dep = p_device_dependent[i];
00510 
00511                 binary_sizes.push_back(dep.unlinked_binary.size());
00512             }
00513 
00514             value = binary_sizes.data();
00515             value_length = binary_sizes.size() * sizeof(size_t);
00516             break;
00517 
00518         case CL_PROGRAM_BINARIES:
00519         {
00520             // Special case : param_value points to an array of p_num_devices
00521             // application-allocated unsigned char* pointers. Check it's good
00522             // and std::memcpy the data
00523 
00524             unsigned char **binaries = (unsigned char **)param_value;
00525             value_length = p_device_dependent.size() * sizeof(unsigned char *);
00526 
00527             if (!param_value || param_value_size < value_length)
00528                 return CL_INVALID_VALUE;
00529 
00530             for (size_t i=0; i<p_device_dependent.size(); ++i)
00531             {
00532                 const DeviceDependent &dep = p_device_dependent[i];
00533                 unsigned char *dest = binaries[i];
00534 
00535                 if (!dest)
00536                     continue;
00537 
00538                 std::memcpy(dest, dep.unlinked_binary.data(),
00539                             dep.unlinked_binary.size());
00540             }
00541 
00542             if (param_value_size_ret)
00543                 *param_value_size_ret = value_length;
00544 
00545             return CL_SUCCESS;
00546         }
00547 
00548         default:
00549             return CL_INVALID_VALUE;
00550     }
00551 
00552     if (param_value && param_value_size < value_length)
00553         return CL_INVALID_VALUE;
00554 
00555     if (param_value_size_ret)
00556         *param_value_size_ret = value_length;
00557 
00558     if (param_value)
00559         std::memcpy(param_value, value, value_length);
00560 
00561     return CL_SUCCESS;
00562 }
00563 
00564 cl_int Program::buildInfo(DeviceInterface *device,
00565                           cl_program_build_info param_name,
00566                           size_t param_value_size,
00567                           void *param_value,
00568                           size_t *param_value_size_ret) const
00569 {
00570     const void *value = 0;
00571     size_t value_length = 0;
00572     const DeviceDependent &dep = deviceDependent(device);
00573 
00574     union {
00575         cl_build_status cl_build_status_var;
00576     };
00577 
00578     switch (param_name)
00579     {
00580         case CL_PROGRAM_BUILD_STATUS:
00581             switch (p_state)
00582             {
00583                 case Empty:
00584                 case Loaded:
00585                     SIMPLE_ASSIGN(cl_build_status, CL_BUILD_NONE);
00586                     break;
00587                 case Built:
00588                     SIMPLE_ASSIGN(cl_build_status, CL_BUILD_SUCCESS);
00589                     break;
00590                 case Failed:
00591                     SIMPLE_ASSIGN(cl_build_status, CL_BUILD_ERROR);
00592                     break;
00593                 // TODO: CL_BUILD_IN_PROGRESS
00594             }
00595             break;
00596 
00597         case CL_PROGRAM_BUILD_OPTIONS:
00598             value = dep.compiler->options().c_str();
00599             value_length = dep.compiler->options().size() + 1;
00600             break;
00601 
00602         case CL_PROGRAM_BUILD_LOG:
00603             value = dep.compiler->log().c_str();
00604             value_length = dep.compiler->log().size() + 1;
00605             break;
00606 
00607         default:
00608             return CL_INVALID_VALUE;
00609     }
00610 
00611     if (param_value && param_value_size < value_length)
00612         return CL_INVALID_VALUE;
00613 
00614     if (param_value_size_ret)
00615         *param_value_size_ret = value_length;
00616 
00617     if (param_value)
00618         std::memcpy(param_value, value, value_length);
00619 
00620     return CL_SUCCESS;
00621 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines