Clover Git
OpenCL 1.1 software implementation
|
00001 /* 00002 * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr> 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions are met: 00007 * * Redistributions of source code must retain the above copyright 00008 * notice, this list of conditions and the following disclaimer. 00009 * * Redistributions in binary form must reproduce the above copyright 00010 * notice, this list of conditions and the following disclaimer in the 00011 * documentation and/or other materials provided with the distribution. 00012 * * Neither the name of the copyright holder nor the 00013 * names of its contributors may be used to endorse or promote products 00014 * derived from this software without specific prior written permission. 00015 * 00016 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 00017 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00018 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00019 * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY 00020 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00021 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00022 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00023 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00024 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00025 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00026 */ 00027 00033 #ifndef __CPU_KERNEL_H__ 00034 #define __CPU_KERNEL_H__ 00035 00036 #include "../deviceinterface.h" 00037 #include <core/config.h> 00038 00039 #include <llvm/ExecutionEngine/GenericValue.h> 00040 #include <vector> 00041 #include <string> 00042 00043 #include <ucontext.h> 00044 #include <pthread.h> 00045 #include <stdint.h> 00046 00047 namespace llvm 00048 { 00049 class Function; 00050 } 00051 00052 namespace Coal 00053 { 00054 00055 class CPUDevice; 00056 class Kernel; 00057 class KernelEvent; 00058 class Image2D; 00059 class Image3D; 00060 00071 class CPUKernel : public DeviceKernel 00072 { 00073 public: 00081 CPUKernel(CPUDevice *device, Kernel *kernel, llvm::Function *function); 00082 ~CPUKernel(); 00083 00084 size_t workGroupSize() const; 00085 cl_ulong localMemSize() const; 00086 cl_ulong privateMemSize() const; 00087 size_t preferredWorkGroupSizeMultiple() const; 00088 size_t guessWorkGroupSize(cl_uint num_dims, cl_uint dim, 00089 size_t global_work_size) const; 00090 00091 Kernel *kernel() const; 00092 CPUDevice *device() const; 00094 llvm::Function *function() const; 00095 llvm::Function *callFunction(); 00130 static size_t typeOffset(size_t &offset, size_t type_len); 00131 00132 private: 00133 CPUDevice *p_device; 00134 Kernel *p_kernel; 00135 llvm::Function *p_function, *p_call_function; 00136 pthread_mutex_t p_call_function_mutex; 00137 }; 00138 00139 class CPUKernelEvent; 00140 00150 class CPUKernelWorkGroup 00151 { 00152 public: 00160 CPUKernelWorkGroup(CPUKernel *kernel, KernelEvent *event, 00161 CPUKernelEvent *cpu_event, 00162 const size_t *work_group_index); 00163 ~CPUKernelWorkGroup(); 00164 00180 void *callArgs(std::vector<void *> &locals_to_free); 00181 00193 bool run(); 00194 00199 size_t getGlobalId(cl_uint dimindx) const; 00200 cl_uint getWorkDim() const; 00201 size_t getGlobalSize(cl_uint dimindx) const; 00202 size_t getLocalSize(cl_uint dimindx) const; 00203 size_t getLocalID(cl_uint dimindx) const; 00204 size_t getNumGroups(cl_uint dimindx) const; 00205 size_t getGroupID(cl_uint dimindx) const; 00206 size_t getGlobalOffset(cl_uint dimindx) const; 00207 00208 void barrier(unsigned int flags); 00209 00210 void *getImageData(Image2D *image, int x, int y, int z) const; 00211 00212 void writeImage(Image2D *image, int x, int y, int z, float *color) const; 00213 void writeImage(Image2D *image, int x, int y, int z, int32_t *color) const; 00214 void writeImage(Image2D *image, int x, int y, int z, uint32_t *color) const; 00215 00216 void readImage(float *result, Image2D *image, int x, int y, int z, 00217 uint32_t sampler) const; 00218 void readImage(int32_t *result, Image2D *image, int x, int y, int z, 00219 uint32_t sampler) const; 00220 void readImage(uint32_t *result, Image2D *image, int x, int y, int z, 00221 uint32_t sampler) const; 00222 00223 void readImage(float *result, Image2D *image, float x, float y, float z, 00224 uint32_t sampler) const; 00225 void readImage(int32_t *result, Image2D *image, float x, float y, float z, 00226 uint32_t sampler) const; 00227 void readImage(uint32_t *result, Image2D *image, float x, float y, float z, 00228 uint32_t sampler) const; 00236 void builtinNotFound(const std::string &name) const; 00237 00238 private: 00239 template<typename T> 00240 void writeImageImpl(Image2D *image, int x, int y, int z, T *color) const; 00241 template<typename T> 00242 void readImageImplI(T *result, Image2D *image, int x, int y, int z, 00243 uint32_t sampler) const; 00244 template<typename T> 00245 void readImageImplF(T *result, Image2D *image, float x, float y, float z, 00246 uint32_t sampler) const; 00247 template<typename T> 00248 void linear3D(T *result, float a, float b, float c, 00249 int i0, int j0, int k0, int i1, int j1, int k1, 00250 Image3D *image) const; 00251 template<typename T> 00252 void linear2D(T *result, float a, float b, float c, int i0, int j0, 00253 int i1, int j1, Image2D *image) const; 00254 00255 private: 00256 CPUKernel *p_kernel; 00257 CPUKernelEvent *p_cpu_event; 00258 KernelEvent *p_event; 00259 cl_uint p_work_dim; 00260 size_t p_index[MAX_WORK_DIMS], 00261 p_max_local_id[MAX_WORK_DIMS], 00262 p_global_id_start_offset[MAX_WORK_DIMS]; 00263 00264 void (*p_kernel_func_addr)(void *); 00265 void *p_args; 00266 00267 // Machinery to have barrier() working 00268 struct Context 00269 { 00270 size_t local_id[MAX_WORK_DIMS]; 00271 ucontext_t context; 00272 unsigned int initialized; 00273 }; 00274 00275 Context *getContextAddr(unsigned int index); 00276 00277 Context *p_current_context; 00278 Context p_dummy_context; 00279 void *p_contexts; 00280 size_t p_stack_size; 00281 unsigned int p_num_work_items, p_current_work_item; 00282 bool p_had_barrier; 00283 }; 00284 00292 class CPUKernelEvent 00293 { 00294 public: 00301 CPUKernelEvent(CPUDevice *device, KernelEvent *event); 00302 ~CPUKernelEvent(); 00303 00304 bool reserve(); 00305 bool finished(); 00306 CPUKernelWorkGroup *takeInstance(); 00308 void *kernelArgs() const; 00309 void cacheKernelArgs(void *args); 00311 void workGroupFinished(); 00313 private: 00314 CPUDevice *p_device; 00315 KernelEvent *p_event; 00316 size_t p_current_work_group[MAX_WORK_DIMS], 00317 p_max_work_groups[MAX_WORK_DIMS]; 00318 size_t p_current_wg, p_finished_wg, p_num_wg; 00319 pthread_mutex_t p_mutex; 00320 void *p_kernel_args; 00321 }; 00322 00323 } 00324 00325 #endif