Clover Git
OpenCL 1.1 software implementation

kernel.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions are met:
00007  *     * Redistributions of source code must retain the above copyright
00008  *       notice, this list of conditions and the following disclaimer.
00009  *     * Redistributions in binary form must reproduce the above copyright
00010  *       notice, this list of conditions and the following disclaimer in the
00011  *       documentation and/or other materials provided with the distribution.
00012  *     * Neither the name of the copyright holder nor the
00013  *       names of its contributors may be used to endorse or promote products
00014  *       derived from this software without specific prior written permission.
00015  *
00016  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
00017  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00018  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00019  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
00020  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00021  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00022  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00023  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00024  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00025  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00026  */
00027 
00033 #include "kernel.h"
00034 #include "propertylist.h"
00035 #include "program.h"
00036 #include "memobject.h"
00037 #include "sampler.h"
00038 #include "deviceinterface.h"
00039 
00040 #include <string>
00041 #include <iostream>
00042 #include <cstring>
00043 #include <cstdlib>
00044 
00045 #include <llvm/Support/Casting.h>
00046 #include <llvm/Module.h>
00047 #include <llvm/Type.h>
00048 #include <llvm/DerivedTypes.h>
00049 
00050 using namespace Coal;
00051 Kernel::Kernel(Program *program)
00052 : Object(Object::T_Kernel, program), p_has_locals(false)
00053 {
00054     // TODO: Say a kernel is attached to the program (that becomes unalterable)
00055 
00056     null_dep.device = 0;
00057     null_dep.kernel = 0;
00058     null_dep.function = 0;
00059     null_dep.module = 0;
00060 }
00061 
00062 Kernel::~Kernel()
00063 {
00064     while (p_device_dependent.size())
00065     {
00066         DeviceDependent &dep = p_device_dependent.back();
00067 
00068         delete dep.kernel;
00069 
00070         p_device_dependent.pop_back();
00071     }
00072 }
00073 
00074 const Kernel::DeviceDependent &Kernel::deviceDependent(DeviceInterface *device) const
00075 {
00076     for (size_t i=0; i<p_device_dependent.size(); ++i)
00077     {
00078         const DeviceDependent &rs = p_device_dependent[i];
00079 
00080         if (rs.device == device || (!device && p_device_dependent.size() == 1))
00081             return rs;
00082     }
00083 
00084     return null_dep;
00085 }
00086 
00087 Kernel::DeviceDependent &Kernel::deviceDependent(DeviceInterface *device)
00088 {
00089     for (size_t i=0; i<p_device_dependent.size(); ++i)
00090     {
00091         DeviceDependent &rs = p_device_dependent[i];
00092 
00093         if (rs.device == device || (!device && p_device_dependent.size() == 1))
00094             return rs;
00095     }
00096 
00097     return null_dep;
00098 }
00099 
00100 cl_int Kernel::addFunction(DeviceInterface *device, llvm::Function *function,
00101                            llvm::Module *module)
00102 {
00103     p_name = function->getNameStr();
00104 
00105     // Add a device dependent
00106     DeviceDependent dep;
00107 
00108     dep.device = device;
00109     dep.function = function;
00110     dep.module = module;
00111 
00112     // Build the arg list of the kernel (or verify it if a previous function
00113     // was already registered)
00114     llvm::FunctionType *f = function->getFunctionType();
00115     bool append = (p_args.size() == 0);
00116 
00117     if (!append && p_args.size() != f->getNumParams())
00118         return CL_INVALID_KERNEL_DEFINITION;
00119 
00120     for (unsigned int i=0; i<f->getNumParams(); ++i)
00121     {
00122         llvm::Type *arg_type = f->getParamType(i);
00123         Arg::Kind kind = Arg::Invalid;
00124         Arg::File file = Arg::Private;
00125         unsigned short vec_dim = 1;
00126 
00127         if (arg_type->isPointerTy())
00128         {
00129             // It's a pointer, dereference it
00130             llvm::PointerType *p_type = llvm::cast<llvm::PointerType>(arg_type);
00131 
00132             file = (Arg::File)p_type->getAddressSpace();
00133             arg_type = p_type->getElementType();
00134 
00135             // If it's a __local argument, we'll have to allocate memory at run time
00136             if (file == Arg::Local)
00137                 p_has_locals = true;
00138 
00139             kind = Arg::Buffer;
00140 
00141             // If it's a struct, get its name
00142             if (arg_type->isStructTy())
00143             {
00144                 llvm::StructType *struct_type =
00145                     llvm::cast<llvm::StructType>(arg_type);
00146                 std::string struct_name = struct_type->getName().str();
00147 
00148                 if (struct_name.compare(0, 14, "struct.image2d") == 0)
00149                 {
00150                     kind = Arg::Image2D;
00151                     file = Arg::Global;
00152                 }
00153                 else if (struct_name.compare(0, 14, "struct.image3d") == 0)
00154                 {
00155                     kind = Arg::Image3D;
00156                     file = Arg::Global;
00157                 }
00158             }
00159         }
00160         else
00161         {
00162             if (arg_type->isVectorTy())
00163             {
00164                 // It's a vector, we need its element's type
00165                 llvm::VectorType *v_type = llvm::cast<llvm::VectorType>(arg_type);
00166 
00167                 vec_dim = v_type->getNumElements();
00168                 arg_type = v_type->getElementType();
00169             }
00170 
00171             // Get type kind
00172             if (arg_type->isFloatTy())
00173             {
00174                 kind = Arg::Float;
00175             }
00176             else if (arg_type->isDoubleTy())
00177             {
00178                 kind = Arg::Double;
00179             }
00180             else if (arg_type->isIntegerTy())
00181             {
00182                 llvm::IntegerType *i_type = llvm::cast<llvm::IntegerType>(arg_type);
00183 
00184                 if (i_type->getBitWidth() == 8)
00185                 {
00186                     kind = Arg::Int8;
00187                 }
00188                 else if (i_type->getBitWidth() == 16)
00189                 {
00190                     kind = Arg::Int16;
00191                 }
00192                 else if (i_type->getBitWidth() == 32)
00193                 {
00194                     // NOTE: May also be a sampler, check done in setArg
00195                     kind = Arg::Int32;
00196                 }
00197                 else if (i_type->getBitWidth() == 64)
00198                 {
00199                     kind = Arg::Int64;
00200                 }
00201             }
00202         }
00203 
00204         // Check if we recognized the type
00205         if (kind == Arg::Invalid)
00206             return CL_INVALID_KERNEL_DEFINITION;
00207 
00208         // Create arg
00209         Arg a(vec_dim, file, kind);
00210 
00211         // If we also have a function registered, check for signature compliance
00212         if (!append && a != p_args[i])
00213             return CL_INVALID_KERNEL_DEFINITION;
00214 
00215         // Append arg if needed
00216         if (append)
00217             p_args.push_back(a);
00218     }
00219 
00220     dep.kernel = device->createDeviceKernel(this, dep.function);
00221     p_device_dependent.push_back(dep);
00222 
00223     return CL_SUCCESS;
00224 }
00225 
00226 llvm::Function *Kernel::function(DeviceInterface *device) const
00227 {
00228     const DeviceDependent &dep = deviceDependent(device);
00229 
00230     return dep.function;
00231 }
00232 
00233 cl_int Kernel::setArg(cl_uint index, size_t size, const void *value)
00234 {
00235     if (index > p_args.size())
00236         return CL_INVALID_ARG_INDEX;
00237 
00238     Arg &arg = p_args[index];
00239 
00240     // Special case for __local pointers
00241     if (arg.file() == Arg::Local)
00242     {
00243         if (size == 0)
00244             return CL_INVALID_ARG_SIZE;
00245 
00246         if (value != 0)
00247             return CL_INVALID_ARG_VALUE;
00248 
00249         arg.setAllocAtKernelRuntime(size);
00250 
00251         return CL_SUCCESS;
00252     }
00253 
00254     // Check that size corresponds to the arg type
00255     size_t arg_size = arg.valueSize();
00256 
00257     // Special case for samplers (pointers in C++, uint32 in OpenCL).
00258     if (size == sizeof(cl_sampler) && arg_size == 4 &&
00259         (*(Object **)value)->isA(T_Sampler))
00260     {
00261         unsigned int bitfield = (*(Sampler **)value)->bitfield();
00262 
00263         arg.refineKind(Arg::Sampler);
00264         arg.alloc();
00265         arg.loadData(&bitfield);
00266 
00267         return CL_SUCCESS;
00268     }
00269 
00270     if (size != arg_size)
00271         return CL_INVALID_ARG_SIZE;
00272 
00273     // Check for null values
00274     cl_mem null_mem = 0;
00275 
00276     if (!value)
00277     {
00278         switch (arg.kind())
00279         {
00280             case Arg::Buffer:
00281             case Arg::Image2D:
00282             case Arg::Image3D:
00283                 // Special case buffers : value can be 0 (or point to 0)
00284                 value = &null_mem;
00285 
00286             default:
00287                 return CL_INVALID_ARG_VALUE;
00288         }
00289     }
00290 
00291     // Copy the data
00292     arg.alloc();
00293     arg.loadData(value);
00294 
00295     return CL_SUCCESS;
00296 }
00297 
00298 unsigned int Kernel::numArgs() const
00299 {
00300     return p_args.size();
00301 }
00302 
00303 const Kernel::Arg &Kernel::arg(unsigned int index) const
00304 {
00305     return p_args.at(index);
00306 }
00307 
00308 bool Kernel::argsSpecified() const
00309 {
00310     for (size_t i=0; i<p_args.size(); ++i)
00311     {
00312         if (!p_args[i].defined())
00313             return false;
00314     }
00315 
00316     return true;
00317 }
00318 
00319 bool Kernel::hasLocals() const
00320 {
00321     return p_has_locals;
00322 }
00323 
00324 DeviceKernel *Kernel::deviceDependentKernel(DeviceInterface *device) const
00325 {
00326     const DeviceDependent &dep = deviceDependent(device);
00327 
00328     return dep.kernel;
00329 }
00330 
00331 cl_int Kernel::info(cl_kernel_info param_name,
00332                     size_t param_value_size,
00333                     void *param_value,
00334                     size_t *param_value_size_ret) const
00335 {
00336     void *value = 0;
00337     size_t value_length = 0;
00338 
00339     union {
00340         cl_uint cl_uint_var;
00341         cl_program cl_program_var;
00342         cl_context cl_context_var;
00343     };
00344 
00345     switch (param_name)
00346     {
00347         case CL_KERNEL_FUNCTION_NAME:
00348             MEM_ASSIGN(p_name.size() + 1, p_name.c_str());
00349             break;
00350 
00351         case CL_KERNEL_NUM_ARGS:
00352             SIMPLE_ASSIGN(cl_uint, p_args.size());
00353             break;
00354 
00355         case CL_KERNEL_REFERENCE_COUNT:
00356             SIMPLE_ASSIGN(cl_uint, references());
00357             break;
00358 
00359         case CL_KERNEL_CONTEXT:
00360             SIMPLE_ASSIGN(cl_context, parent()->parent());
00361             break;
00362 
00363         case CL_KERNEL_PROGRAM:
00364             SIMPLE_ASSIGN(cl_program, parent());
00365             break;
00366 
00367         default:
00368             return CL_INVALID_VALUE;
00369     }
00370 
00371     if (param_value && param_value_size < value_length)
00372         return CL_INVALID_VALUE;
00373 
00374     if (param_value_size_ret)
00375         *param_value_size_ret = value_length;
00376 
00377     if (param_value)
00378         std::memcpy(param_value, value, value_length);
00379 
00380     return CL_SUCCESS;
00381 }
00382 
00383 cl_int Kernel::workGroupInfo(DeviceInterface *device,
00384                              cl_kernel_work_group_info param_name,
00385                              size_t param_value_size,
00386                              void *param_value,
00387                              size_t *param_value_size_ret) const
00388 {
00389     void *value = 0;
00390     size_t value_length = 0;
00391 
00392     union {
00393         size_t size_t_var;
00394         size_t three_size_t[3];
00395         cl_ulong cl_ulong_var;
00396     };
00397 
00398     const DeviceDependent &dep = deviceDependent(device);
00399 
00400     switch (param_name)
00401     {
00402         case CL_KERNEL_WORK_GROUP_SIZE:
00403             SIMPLE_ASSIGN(size_t, dep.kernel->workGroupSize());
00404             break;
00405 
00406         case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
00407             // TODO: Get this information from the kernel source
00408             three_size_t[0] = 0;
00409             three_size_t[1] = 0;
00410             three_size_t[2] = 0;
00411             value = &three_size_t;
00412             value_length = sizeof(three_size_t);
00413             break;
00414 
00415         case CL_KERNEL_LOCAL_MEM_SIZE:
00416             SIMPLE_ASSIGN(cl_ulong, dep.kernel->localMemSize());
00417             break;
00418 
00419         case CL_KERNEL_PRIVATE_MEM_SIZE:
00420             SIMPLE_ASSIGN(cl_ulong, dep.kernel->privateMemSize());
00421             break;
00422 
00423         case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
00424             SIMPLE_ASSIGN(size_t, dep.kernel->preferredWorkGroupSizeMultiple());
00425             break;
00426 
00427         default:
00428             return CL_INVALID_VALUE;
00429     }
00430 
00431     if (param_value && param_value_size < value_length)
00432         return CL_INVALID_VALUE;
00433 
00434     if (param_value_size_ret)
00435         *param_value_size_ret = value_length;
00436 
00437     if (param_value)
00438         std::memcpy(param_value, value, value_length);
00439 
00440     return CL_SUCCESS;
00441 }
00442 
00443 /*
00444  * Kernel::Arg
00445  */
00446 Kernel::Arg::Arg(unsigned short vec_dim, File file, Kind kind)
00447 : p_vec_dim(vec_dim), p_file(file), p_kind(kind), p_data(0), p_defined(false),
00448   p_runtime_alloc(0)
00449 {
00450 
00451 }
00452 
00453 Kernel::Arg::~Arg()
00454 {
00455     if (p_data)
00456         std::free(p_data);
00457 }
00458 
00459 void Kernel::Arg::alloc()
00460 {
00461     if (!p_data)
00462         p_data = std::malloc(p_vec_dim * valueSize());
00463 }
00464 
00465 void Kernel::Arg::loadData(const void *data)
00466 {
00467     std::memcpy(p_data, data, p_vec_dim * valueSize());
00468     p_defined = true;
00469 }
00470 
00471 void Kernel::Arg::setAllocAtKernelRuntime(size_t size)
00472 {
00473     p_runtime_alloc = size;
00474     p_defined = true;
00475 }
00476 
00477 void Kernel::Arg::refineKind (Kernel::Arg::Kind kind)
00478 {
00479     p_kind = kind;
00480 }
00481 
00482 bool Kernel::Arg::operator!=(const Arg &b)
00483 {
00484     bool same = (p_vec_dim == b.p_vec_dim) &&
00485                 (p_file == b.p_file) &&
00486                 (p_kind == b.p_kind);
00487 
00488     return !same;
00489 }
00490 
00491 size_t Kernel::Arg::valueSize() const
00492 {
00493     switch (p_kind)
00494     {
00495         case Invalid:
00496             return 0;
00497         case Int8:
00498             return 1;
00499         case Int16:
00500             return 2;
00501         case Int32:
00502         case Sampler:
00503             return 4;
00504         case Int64:
00505             return 8;
00506         case Float:
00507             return sizeof(cl_float);
00508         case Double:
00509             return sizeof(double);
00510         case Buffer:
00511         case Image2D:
00512         case Image3D:
00513             return sizeof(cl_mem);
00514     }
00515 
00516     return 0;
00517 }
00518 
00519 unsigned short Kernel::Arg::vecDim() const
00520 {
00521     return p_vec_dim;
00522 }
00523 
00524 Kernel::Arg::File Kernel::Arg::file() const
00525 {
00526     return p_file;
00527 }
00528 
00529 Kernel::Arg::Kind Kernel::Arg::kind() const
00530 {
00531     return p_kind;
00532 }
00533 
00534 bool Kernel::Arg::defined() const
00535 {
00536     return p_defined;
00537 }
00538 
00539 size_t Kernel::Arg::allocAtKernelRuntime() const
00540 {
00541     return p_runtime_alloc;
00542 }
00543 
00544 const void *Kernel::Arg::value(unsigned short index) const
00545 {
00546     const char *data = (const char *)p_data;
00547     unsigned int offset = index * valueSize();
00548 
00549     data += offset;
00550 
00551     return (const void *)data;
00552 }
00553 
00554 const void *Kernel::Arg::data() const
00555 {
00556     return p_data;
00557 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines