Clover Git
OpenCL 1.1 software implementation

kernel.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr>
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions are met:
00007  *     * Redistributions of source code must retain the above copyright
00008  *       notice, this list of conditions and the following disclaimer.
00009  *     * Redistributions in binary form must reproduce the above copyright
00010  *       notice, this list of conditions and the following disclaimer in the
00011  *       documentation and/or other materials provided with the distribution.
00012  *     * Neither the name of the copyright holder nor the
00013  *       names of its contributors may be used to endorse or promote products
00014  *       derived from this software without specific prior written permission.
00015  *
00016  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
00017  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00018  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00019  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
00020  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00021  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00022  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00023  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00024  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00025  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00026  */
00027 
00033 #ifndef __CPU_KERNEL_H__
00034 #define __CPU_KERNEL_H__
00035 
00036 #include "../deviceinterface.h"
00037 #include <core/config.h>
00038 
00039 #include <llvm/ExecutionEngine/GenericValue.h>
00040 #include <vector>
00041 #include <string>
00042 
00043 #include <ucontext.h>
00044 #include <pthread.h>
00045 #include <stdint.h>
00046 
00047 namespace llvm
00048 {
00049     class Function;
00050 }
00051 
00052 namespace Coal
00053 {
00054 
00055 class CPUDevice;
00056 class Kernel;
00057 class KernelEvent;
00058 class Image2D;
00059 class Image3D;
00060 
00071 class CPUKernel : public DeviceKernel
00072 {
00073     public:
00081         CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function);
00082         ~CPUKernel();
00083 
00084         size_t workGroupSize() const;
00085         cl_ulong localMemSize() const;
00086         cl_ulong privateMemSize() const;
00087         size_t preferredWorkGroupSizeMultiple() const;
00088         size_t guessWorkGroupSize(cl_uint num_dims, cl_uint dim,
00089                                   size_t global_work_size) const;
00090 
00091         Kernel *kernel() const;     
00092         CPUDevice *device() const;  
00094         llvm::Function *function() const;   
00095         llvm::Function *callFunction();     
00130         static size_t typeOffset(size_t &offset, size_t type_len);
00131 
00132     private:
00133         CPUDevice *p_device;
00134         Kernel *p_kernel;
00135         llvm::Function *p_function, *p_call_function;
00136         pthread_mutex_t p_call_function_mutex;
00137 };
00138 
00139 class CPUKernelEvent;
00140 
00150 class CPUKernelWorkGroup
00151 {
00152     public:
00160         CPUKernelWorkGroup(CPUKernel *kernel, KernelEvent *event,
00161                            CPUKernelEvent *cpu_event,
00162                            const size_t *work_group_index);
00163         ~CPUKernelWorkGroup();
00164 
00180         void *callArgs(std::vector<void *> &locals_to_free);
00181 
00193         bool run();
00194 
00199         size_t getGlobalId(cl_uint dimindx) const;
00200         cl_uint getWorkDim() const;
00201         size_t getGlobalSize(cl_uint dimindx) const;
00202         size_t getLocalSize(cl_uint dimindx) const;
00203         size_t getLocalID(cl_uint dimindx) const;
00204         size_t getNumGroups(cl_uint dimindx) const;
00205         size_t getGroupID(cl_uint dimindx) const;
00206         size_t getGlobalOffset(cl_uint dimindx) const;
00207 
00208         void barrier(unsigned int flags);
00209 
00210         void *getImageData(Image2D *image, int x, int y, int z) const;
00211 
00212         void writeImage(Image2D *image, int x, int y, int z, float *color) const;
00213         void writeImage(Image2D *image, int x, int y, int z, int32_t *color) const;
00214         void writeImage(Image2D *image, int x, int y, int z, uint32_t *color) const;
00215 
00216         void readImage(float *result, Image2D *image, int x, int y, int z,
00217                        uint32_t sampler) const;
00218         void readImage(int32_t *result, Image2D *image, int x, int y, int z,
00219                        uint32_t sampler) const;
00220         void readImage(uint32_t *result, Image2D *image, int x, int y, int z,
00221                        uint32_t sampler) const;
00222 
00223         void readImage(float *result, Image2D *image, float x, float y, float z,
00224                        uint32_t sampler) const;
00225         void readImage(int32_t *result, Image2D *image, float x, float y, float z,
00226                        uint32_t sampler) const;
00227         void readImage(uint32_t *result, Image2D *image, float x, float y, float z,
00228                        uint32_t sampler) const;
00236         void builtinNotFound(const std::string &name) const;
00237 
00238     private:
00239         template<typename T>
00240         void writeImageImpl(Image2D *image, int x, int y, int z, T *color) const;
00241         template<typename T>
00242         void readImageImplI(T *result, Image2D *image, int x, int y, int z,
00243                             uint32_t sampler) const;
00244         template<typename T>
00245         void readImageImplF(T *result, Image2D *image, float x, float y, float z,
00246                             uint32_t sampler) const;
00247         template<typename T>
00248         void linear3D(T *result, float a, float b, float c,
00249                        int i0, int j0, int k0, int i1, int j1, int k1,
00250                        Image3D *image) const;
00251         template<typename T>
00252         void linear2D(T *result, float a, float b, float c, int i0, int j0,
00253                       int i1, int j1, Image2D *image) const;
00254 
00255     private:
00256         CPUKernel *p_kernel;
00257         CPUKernelEvent *p_cpu_event;
00258         KernelEvent *p_event;
00259         cl_uint p_work_dim;
00260         size_t p_index[MAX_WORK_DIMS],
00261                p_max_local_id[MAX_WORK_DIMS],
00262                p_global_id_start_offset[MAX_WORK_DIMS];
00263 
00264         void (*p_kernel_func_addr)(void *);
00265         void *p_args;
00266 
00267         // Machinery to have barrier() working
00268         struct Context
00269         {
00270             size_t local_id[MAX_WORK_DIMS];
00271             ucontext_t context;
00272             unsigned int initialized;
00273         };
00274 
00275         Context *getContextAddr(unsigned int index);
00276 
00277         Context *p_current_context;
00278         Context p_dummy_context;
00279         void *p_contexts;
00280         size_t p_stack_size;
00281         unsigned int p_num_work_items, p_current_work_item;
00282         bool p_had_barrier;
00283 };
00284 
00292 class CPUKernelEvent
00293 {
00294     public:
00301         CPUKernelEvent(CPUDevice *device, KernelEvent *event);
00302         ~CPUKernelEvent();
00303 
00304         bool reserve();  
00305         bool finished(); 
00306         CPUKernelWorkGroup *takeInstance(); 
00308         void *kernelArgs() const;           
00309         void cacheKernelArgs(void *args);   
00311         void workGroupFinished();           
00313     private:
00314         CPUDevice *p_device;
00315         KernelEvent *p_event;
00316         size_t p_current_work_group[MAX_WORK_DIMS],
00317                p_max_work_groups[MAX_WORK_DIMS];
00318         size_t p_current_wg, p_finished_wg, p_num_wg;
00319         pthread_mutex_t p_mutex;
00320         void *p_kernel_args;
00321 };
00322 
00323 }
00324 
00325 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines