Clover Git
OpenCL 1.1 software implementation

device.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions are met:
00007  *     * Redistributions of source code must retain the above copyright
00008  *       notice, this list of conditions and the following disclaimer.
00009  *     * Redistributions in binary form must reproduce the above copyright
00010  *       notice, this list of conditions and the following disclaimer in the
00011  *       documentation and/or other materials provided with the distribution.
00012  *     * Neither the name of the copyright holder nor the
00013  *       names of its contributors may be used to endorse or promote products
00014  *       derived from this software without specific prior written permission.
00015  *
00016  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
00017  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00018  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00019  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
00020  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00021  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00022  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00023  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00024  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00025  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00026  */
00027 
00033 #include "device.h"
00034 #include "buffer.h"
00035 #include "kernel.h"
00036 #include "program.h"
00037 #include "worker.h"
00038 #include "builtins.h"
00039 
00040 #include <core/config.h>
00041 #include "../propertylist.h"
00042 #include "../commandqueue.h"
00043 #include "../events.h"
00044 #include "../memobject.h"
00045 #include "../kernel.h"
00046 #include "../program.h"
00047 
00048 #include <cstring>
00049 #include <cstdlib>
00050 #include <unistd.h>
00051 
00052 #include <iostream>
00053 #include <fstream>
00054 #include <sstream>
00055 
00056 using namespace Coal;
00057 
00058 CPUDevice::CPUDevice()
00059 : DeviceInterface(), p_cores(0), p_num_events(0), p_workers(0), p_stop(false),
00060   p_initialized(false)
00061 {
00062 
00063 }
00064 
00065 void CPUDevice::init()
00066 {
00067     if (p_initialized)
00068         return;
00069 
00070     // Initialize the locking machinery
00071     pthread_cond_init(&p_events_cond, 0);
00072     pthread_mutex_init(&p_events_mutex, 0);
00073 
00074     // Get info about the system
00075     p_cores = sysconf(_SC_NPROCESSORS_ONLN);
00076     p_cpu_mhz = 0.0f;
00077 
00078     std::filebuf fb;
00079     fb.open("/proc/cpuinfo", std::ios::in);
00080     std::istream is(&fb);
00081 
00082     while (!is.eof())
00083     {
00084         std::string key, value;
00085 
00086         std::getline(is, key, ':');
00087         is.ignore(1);
00088         std::getline(is, value);
00089 
00090         if (key.compare(0, 7, "cpu MHz") == 0)
00091         {
00092             std::istringstream ss(value);
00093             ss >> p_cpu_mhz;
00094             break;
00095         }
00096     }
00097 
00098     // Create worker threads
00099     p_workers = (pthread_t *)std::malloc(numCPUs() * sizeof(pthread_t));
00100 
00101     for (unsigned int i=0; i<numCPUs(); ++i)
00102     {
00103         pthread_create(&p_workers[i], 0, &worker, this);
00104     }
00105 
00106     p_initialized = true;
00107 }
00108 
00109 CPUDevice::~CPUDevice()
00110 {
00111     if (!p_initialized)
00112         return;
00113 
00114     // Terminate the workers and wait for them
00115     pthread_mutex_lock(&p_events_mutex);
00116 
00117     p_stop = true;
00118 
00119     pthread_cond_broadcast(&p_events_cond);
00120     pthread_mutex_unlock(&p_events_mutex);
00121 
00122     for (unsigned int i=0; i<numCPUs(); ++i)
00123     {
00124         pthread_join(p_workers[i], 0);
00125     }
00126 
00127     // Free allocated memory
00128     std::free((void *)p_workers);
00129     pthread_mutex_destroy(&p_events_mutex);
00130     pthread_cond_destroy(&p_events_cond);
00131 }
00132 
00133 DeviceBuffer *CPUDevice::createDeviceBuffer(MemObject *buffer, cl_int *rs)
00134 {
00135     return (DeviceBuffer *)new CPUBuffer(this, buffer, rs);
00136 }
00137 
00138 DeviceProgram *CPUDevice::createDeviceProgram(Program *program)
00139 {
00140     return (DeviceProgram *)new CPUProgram(this, program);
00141 }
00142 
00143 DeviceKernel *CPUDevice::createDeviceKernel(Kernel *kernel,
00144                                             llvm::Function *function)
00145 {
00146     return (DeviceKernel *)new CPUKernel(this, kernel, function);
00147 }
00148 
00149 cl_int CPUDevice::initEventDeviceData(Event *event)
00150 {
00151     switch (event->type())
00152     {
00153         case Event::MapBuffer:
00154         {
00155             MapBufferEvent *e = (MapBufferEvent *)event;
00156             CPUBuffer *buf = (CPUBuffer *)e->buffer()->deviceBuffer(this);
00157             unsigned char *data = (unsigned char *)buf->data();
00158 
00159             data += e->offset();
00160 
00161             e->setPtr((void *)data);
00162             break;
00163         }
00164         case Event::MapImage:
00165         {
00166             MapImageEvent *e = (MapImageEvent *)event;
00167             Image2D *image = (Image2D *)e->buffer();
00168             CPUBuffer *buf = (CPUBuffer *)image->deviceBuffer(this);
00169             unsigned char *data = (unsigned char *)buf->data();
00170 
00171             data = imageData(data,
00172                              e->origin(0),
00173                              e->origin(1),
00174                              e->origin(2),
00175                              image->row_pitch(),
00176                              image->slice_pitch(),
00177                              image->pixel_size());
00178 
00179             e->setPtr((void *)data);
00180             e->setRowPitch(image->row_pitch());
00181             e->setSlicePitch(image->slice_pitch());
00182             break;
00183         }
00184         case Event::UnmapMemObject:
00185             // Nothing do to
00186             break;
00187 
00188         case Event::NDRangeKernel:
00189         case Event::TaskKernel:
00190         {
00191             // Instantiate the JIT for the CPU program
00192             KernelEvent *e = (KernelEvent *)event;
00193             Program *p = (Program *)e->kernel()->parent();
00194             CPUProgram *prog = (CPUProgram *)p->deviceDependentProgram(this);
00195 
00196             if (!prog->initJIT())
00197                 return CL_INVALID_PROGRAM_EXECUTABLE;
00198 
00199             // Set device-specific data
00200             CPUKernelEvent *cpu_e = new CPUKernelEvent(this, e);
00201             e->setDeviceData((void *)cpu_e);
00202 
00203             break;
00204         }
00205         default:
00206             break;
00207     }
00208 
00209     return CL_SUCCESS;
00210 }
00211 
00212 void CPUDevice::freeEventDeviceData(Event *event)
00213 {
00214     switch (event->type())
00215     {
00216         case Event::NDRangeKernel:
00217         case Event::TaskKernel:
00218         {
00219             CPUKernelEvent *cpu_e = (CPUKernelEvent *)event->deviceData();
00220 
00221             if (cpu_e)
00222                 delete cpu_e;
00223         }
00224         default:
00225             break;
00226     }
00227 }
00228 
00229 void CPUDevice::pushEvent(Event *event)
00230 {
00231     // Add an event in the list
00232     pthread_mutex_lock(&p_events_mutex);
00233 
00234     p_events.push_back(event);
00235     p_num_events++;                 // Way faster than STL list::size() !
00236 
00237     pthread_cond_broadcast(&p_events_cond);
00238     pthread_mutex_unlock(&p_events_mutex);
00239 }
00240 
00241 Event *CPUDevice::getEvent(bool &stop)
00242 {
00243     // Return the first event in the list, if any. Remove it if it is a
00244     // single-shot event.
00245     pthread_mutex_lock(&p_events_mutex);
00246 
00247     while (p_num_events == 0 && !p_stop)
00248         pthread_cond_wait(&p_events_cond, &p_events_mutex);
00249 
00250     if (p_stop)
00251     {
00252         pthread_mutex_unlock(&p_events_mutex);
00253         stop = true;
00254         return 0;
00255     }
00256 
00257     Event *event = p_events.front();
00258 
00259     // If the run of this event will finish it, remove it from the list
00260     bool last_slot = true;
00261 
00262     if (event->type() == Event::NDRangeKernel ||
00263         event->type() == Event::TaskKernel)
00264     {
00265         CPUKernelEvent *ke = (CPUKernelEvent *)event->deviceData();
00266         last_slot = ke->reserve();
00267     }
00268 
00269     if (last_slot)
00270     {
00271         p_num_events--;
00272         p_events.pop_front();
00273     }
00274 
00275     pthread_mutex_unlock(&p_events_mutex);
00276 
00277     return event;
00278 }
00279 
00280 unsigned int CPUDevice::numCPUs() const
00281 {
00282     return p_cores;
00283 }
00284 
00285 float CPUDevice::cpuMhz() const
00286 {
00287     return p_cpu_mhz;
00288 }
00289 
00290 // From inner parentheses to outher ones :
00291 //
00292 // sizeof * 8 => 8
00293 // -1         => 7
00294 // 1 << $     => 10000000
00295 // -1         => 01111111
00296 // *2         => 11111110
00297 // +1         => 11111111
00298 //
00299 // A simple way to do this is (1 << (sizeof(type) * 8)) - 1, but it overflows
00300 // the type (for int8, 1 << $ = 100000000 = 256 > 255)
00301 #define TYPE_MAX(type) ((((type)1 << ((sizeof(type) * 8) - 1)) - 1) * 2 + 1)
00302 
00303 cl_int CPUDevice::info(cl_device_info param_name,
00304                        size_t param_value_size,
00305                        void *param_value,
00306                        size_t *param_value_size_ret) const
00307 {
00308     void *value = 0;
00309     size_t value_length = 0;
00310 
00311     union {
00312         cl_device_type cl_device_type_var;
00313         cl_uint cl_uint_var;
00314         size_t size_t_var;
00315         cl_ulong cl_ulong_var;
00316         cl_bool cl_bool_var;
00317         cl_device_fp_config cl_device_fp_config_var;
00318         cl_device_mem_cache_type cl_device_mem_cache_type_var;
00319         cl_device_local_mem_type cl_device_local_mem_type_var;
00320         cl_device_exec_capabilities cl_device_exec_capabilities_var;
00321         cl_command_queue_properties cl_command_queue_properties_var;
00322         cl_platform_id cl_platform_id_var;
00323         size_t work_dims[MAX_WORK_DIMS];
00324     };
00325 
00326     switch (param_name)
00327     {
00328         case CL_DEVICE_TYPE:
00329             SIMPLE_ASSIGN(cl_device_type, CL_DEVICE_TYPE_CPU);
00330             break;
00331 
00332         case CL_DEVICE_VENDOR_ID:
00333             SIMPLE_ASSIGN(cl_uint, 0);
00334             break;
00335 
00336         case CL_DEVICE_MAX_COMPUTE_UNITS:
00337             SIMPLE_ASSIGN(cl_uint, numCPUs());
00338             break;
00339 
00340         case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
00341             SIMPLE_ASSIGN(cl_uint, MAX_WORK_DIMS);
00342             break;
00343 
00344         case CL_DEVICE_MAX_WORK_GROUP_SIZE:
00345             SIMPLE_ASSIGN(size_t, TYPE_MAX(size_t));
00346             break;
00347 
00348         case CL_DEVICE_MAX_WORK_ITEM_SIZES:
00349             for (int i=0; i<MAX_WORK_DIMS; ++i)
00350             {
00351                 work_dims[i] = TYPE_MAX(size_t);
00352             }
00353             value_length = MAX_WORK_DIMS * sizeof(size_t);
00354             value = &work_dims;
00355             break;
00356 
00357         case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
00358             SIMPLE_ASSIGN(cl_uint, 16);
00359             break;
00360 
00361         case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
00362             SIMPLE_ASSIGN(cl_uint, 8);
00363             break;
00364 
00365         case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
00366             SIMPLE_ASSIGN(cl_uint, 4);
00367             break;
00368 
00369         case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
00370             SIMPLE_ASSIGN(cl_uint, 2);
00371             break;
00372 
00373         case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
00374             SIMPLE_ASSIGN(cl_uint, 4);
00375             break;
00376 
00377         case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
00378             SIMPLE_ASSIGN(cl_uint, 2);
00379             break;
00380 
00381         case CL_DEVICE_MAX_CLOCK_FREQUENCY:
00382             SIMPLE_ASSIGN(cl_uint, cpuMhz() * 1000000);
00383             break;
00384 
00385         case CL_DEVICE_ADDRESS_BITS:
00386             SIMPLE_ASSIGN(cl_uint, 32);
00387             break;
00388 
00389         case CL_DEVICE_MAX_READ_IMAGE_ARGS:
00390             SIMPLE_ASSIGN(cl_uint, 65536);
00391             break;
00392 
00393         case CL_DEVICE_MAX_WRITE_IMAGE_ARGS:
00394             SIMPLE_ASSIGN(cl_uint, 65536);
00395             break;
00396 
00397         case CL_DEVICE_MAX_MEM_ALLOC_SIZE:
00398             SIMPLE_ASSIGN(cl_ulong, 128*1024*1024);
00399             break;
00400 
00401         case CL_DEVICE_IMAGE2D_MAX_WIDTH:
00402             SIMPLE_ASSIGN(size_t, 65536);
00403             break;
00404 
00405         case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
00406             SIMPLE_ASSIGN(size_t, 65536);
00407             break;
00408 
00409         case CL_DEVICE_IMAGE3D_MAX_WIDTH:
00410             SIMPLE_ASSIGN(size_t, 65536);
00411             break;
00412 
00413         case CL_DEVICE_IMAGE3D_MAX_HEIGHT:
00414             SIMPLE_ASSIGN(size_t, 65536);
00415             break;
00416 
00417         case CL_DEVICE_IMAGE3D_MAX_DEPTH:
00418             SIMPLE_ASSIGN(size_t, 65536);
00419             break;
00420 
00421         case CL_DEVICE_IMAGE_SUPPORT:
00422             SIMPLE_ASSIGN(cl_bool, CL_TRUE);
00423             break;
00424 
00425         case CL_DEVICE_MAX_PARAMETER_SIZE:
00426             SIMPLE_ASSIGN(size_t, 65536);
00427             break;
00428 
00429         case CL_DEVICE_MAX_SAMPLERS:
00430             SIMPLE_ASSIGN(cl_uint, 16);
00431             break;
00432 
00433         case CL_DEVICE_MEM_BASE_ADDR_ALIGN:
00434             SIMPLE_ASSIGN(cl_uint, 0);
00435             break;
00436 
00437         case CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE:
00438             SIMPLE_ASSIGN(cl_uint, 16);
00439             break;
00440 
00441         case CL_DEVICE_SINGLE_FP_CONFIG:
00442             // TODO: Check what an x86 SSE engine can support.
00443             SIMPLE_ASSIGN(cl_device_fp_config,
00444                           CL_FP_DENORM |
00445                           CL_FP_INF_NAN |
00446                           CL_FP_ROUND_TO_NEAREST);
00447             break;
00448 
00449         case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE:
00450             SIMPLE_ASSIGN(cl_device_mem_cache_type,
00451                           CL_READ_WRITE_CACHE);
00452             break;
00453 
00454         case CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE:
00455             // TODO: Get this information from the processor
00456             SIMPLE_ASSIGN(cl_uint, 16);
00457             break;
00458 
00459         case CL_DEVICE_GLOBAL_MEM_CACHE_SIZE:
00460             // TODO: Get this information from the processor
00461             SIMPLE_ASSIGN(cl_ulong, 512*1024*1024);
00462             break;
00463 
00464         case CL_DEVICE_GLOBAL_MEM_SIZE:
00465             // TODO: 1 Gio seems to be enough for software acceleration
00466             SIMPLE_ASSIGN(cl_ulong, 1*1024*1024*1024);
00467             break;
00468 
00469         case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
00470             SIMPLE_ASSIGN(cl_ulong, 1*1024*1024*1024);
00471             break;
00472 
00473         case CL_DEVICE_MAX_CONSTANT_ARGS:
00474             SIMPLE_ASSIGN(cl_uint, 65536);
00475             break;
00476 
00477         case CL_DEVICE_LOCAL_MEM_TYPE:
00478             SIMPLE_ASSIGN(cl_device_local_mem_type, CL_GLOBAL);
00479             break;
00480 
00481         case CL_DEVICE_LOCAL_MEM_SIZE:
00482             SIMPLE_ASSIGN(cl_ulong, 1*1024*1024*1024);
00483             break;
00484 
00485         case CL_DEVICE_ERROR_CORRECTION_SUPPORT:
00486             SIMPLE_ASSIGN(cl_bool, CL_FALSE);
00487             break;
00488 
00489         case CL_DEVICE_PROFILING_TIMER_RESOLUTION:
00490             // TODO
00491             SIMPLE_ASSIGN(size_t, 1000);        // 1000 nanoseconds = 1 ms
00492             break;
00493 
00494         case CL_DEVICE_ENDIAN_LITTLE:
00495             SIMPLE_ASSIGN(cl_bool, CL_TRUE);
00496             break;
00497 
00498         case CL_DEVICE_AVAILABLE:
00499             SIMPLE_ASSIGN(cl_bool, CL_TRUE);
00500             break;
00501 
00502         case CL_DEVICE_COMPILER_AVAILABLE:
00503             SIMPLE_ASSIGN(cl_bool, CL_TRUE);
00504             break;
00505 
00506         case CL_DEVICE_EXECUTION_CAPABILITIES:
00507             SIMPLE_ASSIGN(cl_device_exec_capabilities, CL_EXEC_KERNEL |
00508                           CL_EXEC_NATIVE_KERNEL);
00509             break;
00510 
00511         case CL_DEVICE_QUEUE_PROPERTIES:
00512             SIMPLE_ASSIGN(cl_command_queue_properties,
00513                           CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
00514                           CL_QUEUE_PROFILING_ENABLE);
00515             break;
00516 
00517         case CL_DEVICE_NAME:
00518             STRING_ASSIGN("CPU");
00519             break;
00520 
00521         case CL_DEVICE_VENDOR:
00522             STRING_ASSIGN("Mesa");
00523             break;
00524 
00525         case CL_DRIVER_VERSION:
00526             STRING_ASSIGN("" COAL_VERSION);
00527             break;
00528 
00529         case CL_DEVICE_PROFILE:
00530             STRING_ASSIGN("FULL_PROFILE");
00531             break;
00532 
00533         case CL_DEVICE_VERSION:
00534             STRING_ASSIGN("OpenCL 1.1 Mesa " COAL_VERSION);
00535             break;
00536 
00537         case CL_DEVICE_EXTENSIONS:
00538             STRING_ASSIGN("cl_khr_global_int32_base_atomics"
00539                           " cl_khr_global_int32_extended_atomics"
00540                           " cl_khr_local_int32_base_atomics"
00541                           " cl_khr_local_int32_extended_atomics"
00542                           " cl_khr_byte_addressable_store"
00543 
00544                           " cl_khr_fp64"
00545                           " cl_khr_int64_base_atomics"
00546                           " cl_khr_int64_extended_atomics")
00547 
00548             break;
00549 
00550         case CL_DEVICE_PLATFORM:
00551             SIMPLE_ASSIGN(cl_platform_id, 0);
00552             break;
00553 
00554         case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
00555             SIMPLE_ASSIGN(cl_uint, 0);
00556             break;
00557 
00558         case CL_DEVICE_HOST_UNIFIED_MEMORY:
00559             SIMPLE_ASSIGN(cl_bool, CL_TRUE);
00560             break;
00561 
00562         case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR:
00563             SIMPLE_ASSIGN(cl_uint, 16);
00564             break;
00565 
00566         case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT:
00567             SIMPLE_ASSIGN(cl_uint, 8);
00568             break;
00569 
00570         case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT:
00571             SIMPLE_ASSIGN(cl_uint, 4);
00572             break;
00573 
00574         case CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG:
00575             SIMPLE_ASSIGN(cl_uint, 2);
00576             break;
00577 
00578         case CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT:
00579             SIMPLE_ASSIGN(cl_uint, 4);
00580             break;
00581 
00582         case CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE:
00583             SIMPLE_ASSIGN(cl_uint, 2);
00584             break;
00585 
00586         case CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF:
00587             SIMPLE_ASSIGN(cl_uint, 0);
00588             break;
00589 
00590         case CL_DEVICE_OPENCL_C_VERSION:
00591             STRING_ASSIGN("OpenCL C 1.1 LLVM " LLVM_VERSION);
00592             break;
00593 
00594         default:
00595             return CL_INVALID_VALUE;
00596     }
00597 
00598     if (param_value && param_value_size < value_length)
00599         return CL_INVALID_VALUE;
00600 
00601     if (param_value_size_ret)
00602         *param_value_size_ret = value_length;
00603 
00604     if (param_value)
00605         std::memcpy(param_value, value, value_length);
00606 
00607     return CL_SUCCESS;
00608 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines