Clover Git
OpenCL 1.1 software implementation
|
00001 /* 00002 * Copyright (c) 2011, Denis Steckelmacher <steckdenis@yahoo.fr> 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions are met: 00007 * * Redistributions of source code must retain the above copyright 00008 * notice, this list of conditions and the following disclaimer. 00009 * * Redistributions in binary form must reproduce the above copyright 00010 * notice, this list of conditions and the following disclaimer in the 00011 * documentation and/or other materials provided with the distribution. 00012 * * Neither the name of the copyright holder nor the 00013 * names of its contributors may be used to endorse or promote products 00014 * derived from this software without specific prior written permission. 00015 * 00016 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 00017 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00018 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00019 * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY 00020 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00021 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00022 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00023 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00024 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00025 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00026 */ 00027 00034 #include "worker.h" 00035 #include "device.h" 00036 #include "buffer.h" 00037 #include "kernel.h" 00038 #include "builtins.h" 00039 00040 #include "../commandqueue.h" 00041 #include "../events.h" 00042 #include "../memobject.h" 00043 #include "../kernel.h" 00044 00045 #include <sys/mman.h> 00046 00047 #include <cstring> 00048 #include <iostream> 00049 00050 using namespace Coal; 00051 00052 void *worker(void *data) 00053 { 00054 CPUDevice *device = (CPUDevice *)data; 00055 bool stop = false; 00056 cl_int errcode; 00057 Event *event; 00058 00059 // Initialize TLS 00060 setWorkItemsData(0, 0); 00061 00062 while (true) 00063 { 00064 event = device->getEvent(stop); 00065 00066 // Ensure we have a good event and we don't have to stop 00067 if (stop) break; 00068 if (!event) continue; 00069 00070 // Get info about the event and its command queue 00071 Event::Type t = event->type(); 00072 CommandQueue *queue = 0; 00073 cl_command_queue_properties queue_props = 0; 00074 00075 errcode = CL_SUCCESS; 00076 00077 event->info(CL_EVENT_COMMAND_QUEUE, sizeof(CommandQueue *), &queue, 0); 00078 00079 if (queue) 00080 queue->info(CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), 00081 &queue_props, 0); 00082 00083 if (queue_props & CL_QUEUE_PROFILING_ENABLE) 00084 event->updateTiming(Event::Start); 00085 00086 // Execute the action 00087 switch (t) 00088 { 00089 case Event::ReadBuffer: 00090 case Event::WriteBuffer: 00091 { 00092 ReadWriteBufferEvent *e = (ReadWriteBufferEvent *)event; 00093 CPUBuffer *buf = (CPUBuffer *)e->buffer()->deviceBuffer(device); 00094 char *data = (char *)buf->data(); 00095 00096 data += e->offset(); 00097 00098 if (t == Event::ReadBuffer) 00099 std::memcpy(e->ptr(), data, e->cb()); 00100 else 00101 std::memcpy(data, e->ptr(), e->cb()); 00102 00103 break; 00104 } 00105 case Event::CopyBuffer: 00106 { 00107 CopyBufferEvent *e = (CopyBufferEvent *)event; 00108 CPUBuffer *src = (CPUBuffer *)e->source()->deviceBuffer(device); 00109 CPUBuffer *dst = (CPUBuffer *)e->destination()->deviceBuffer(device); 00110 00111 std::memcpy(dst->data(), src->data(), e->cb()); 00112 00113 break; 00114 } 00115 case Event::ReadBufferRect: 00116 case Event::WriteBufferRect: 00117 case Event::CopyBufferRect: 00118 case Event::ReadImage: 00119 case Event::WriteImage: 00120 case Event::CopyImage: 00121 case Event::CopyBufferToImage: 00122 case Event::CopyImageToBuffer: 00123 { 00124 // src = buffer and dst = mem if note copy 00125 ReadWriteCopyBufferRectEvent *e = (ReadWriteCopyBufferRectEvent *)event; 00126 CPUBuffer *src_buf = (CPUBuffer *)e->source()->deviceBuffer(device); 00127 00128 unsigned char *src = (unsigned char *)src_buf->data(); 00129 unsigned char *dst; 00130 00131 switch (t) 00132 { 00133 case Event::CopyBufferRect: 00134 case Event::CopyImage: 00135 case Event::CopyImageToBuffer: 00136 case Event::CopyBufferToImage: 00137 { 00138 CopyBufferRectEvent *cbre = (CopyBufferRectEvent *)e; 00139 CPUBuffer *dst_buf = 00140 (CPUBuffer *)cbre->destination()->deviceBuffer(device); 00141 00142 dst = (unsigned char *)dst_buf->data(); 00143 break; 00144 } 00145 default: 00146 { 00147 // dst = host memory location 00148 ReadWriteBufferRectEvent *rwbre = (ReadWriteBufferRectEvent *)e; 00149 00150 dst = (unsigned char *)rwbre->ptr(); 00151 } 00152 } 00153 00154 // Iterate over the lines to copy and use memcpy 00155 for (size_t z=0; z<e->region(2); ++z) 00156 { 00157 for (size_t y=0; y<e->region(1); ++y) 00158 { 00159 unsigned char *s; 00160 unsigned char *d; 00161 00162 d = imageData(dst, 00163 e->dst_origin(0), 00164 y + e->dst_origin(1), 00165 z + e->dst_origin(2), 00166 e->dst_row_pitch(), 00167 e->dst_slice_pitch(), 00168 1); 00169 00170 s = imageData(src, 00171 e->src_origin(0), 00172 y + e->src_origin(1), 00173 z + e->src_origin(2), 00174 e->src_row_pitch(), 00175 e->src_slice_pitch(), 00176 1); 00177 00178 // Copying and image to a buffer may need to add an offset 00179 // to the buffer address (its rectangular origin is 00180 // always (0, 0, 0)). 00181 if (t == Event::CopyBufferToImage) 00182 { 00183 CopyBufferToImageEvent *cptie = (CopyBufferToImageEvent *)e; 00184 s += cptie->offset(); 00185 } 00186 else if (t == Event::CopyImageToBuffer) 00187 { 00188 CopyImageToBufferEvent *citbe = (CopyImageToBufferEvent *)e; 00189 d += citbe->offset(); 00190 } 00191 00192 if (t == Event::WriteBufferRect || t == Event::WriteImage) 00193 std::memcpy(s, d, e->region(0)); // Write dest (memory) in src 00194 else 00195 std::memcpy(d, s, e->region(0)); // Write src (buffer) in dest (memory), or copy the buffers 00196 } 00197 } 00198 00199 break; 00200 } 00201 case Event::MapBuffer: 00202 case Event::MapImage: 00203 // All was already done in CPUBuffer::initEventDeviceData() 00204 break; 00205 00206 case Event::NativeKernel: 00207 { 00208 NativeKernelEvent *e = (NativeKernelEvent *)event; 00209 void (*func)(void *) = (void (*)(void *))e->function(); 00210 void *args = e->args(); 00211 00212 func(args); 00213 00214 break; 00215 } 00216 case Event::NDRangeKernel: 00217 case Event::TaskKernel: 00218 { 00219 KernelEvent *e = (KernelEvent *)event; 00220 CPUKernelEvent *ke = (CPUKernelEvent *)e->deviceData(); 00221 00222 // Take an instance 00223 CPUKernelWorkGroup *instance = ke->takeInstance(); 00224 ke = 0; // Unlocked, don't use anymore 00225 00226 if (!instance->run()) 00227 errcode = CL_INVALID_PROGRAM_EXECUTABLE; 00228 00229 delete instance; 00230 00231 break; 00232 } 00233 default: 00234 break; 00235 } 00236 00237 // Cleanups 00238 if (errcode == CL_SUCCESS) 00239 { 00240 bool finished = true; 00241 00242 if (event->type() == Event::NDRangeKernel || 00243 event->type() == Event::TaskKernel) 00244 { 00245 CPUKernelEvent *ke = (CPUKernelEvent *)event->deviceData(); 00246 finished = ke->finished(); 00247 } 00248 00249 if (finished) 00250 { 00251 event->setStatus(Event::Complete); 00252 00253 if (queue_props & CL_QUEUE_PROFILING_ENABLE) 00254 event->updateTiming(Event::End); 00255 00256 // Clean the queue 00257 if (queue) 00258 queue->cleanEvents(); 00259 } 00260 } 00261 else 00262 { 00263 // The event failed 00264 event->setStatus((Event::Status)errcode); 00265 00266 if (queue_props & CL_QUEUE_PROFILING_ENABLE) 00267 event->updateTiming(Event::End); 00268 } 00269 } 00270 00271 // Free mmapped() data if needed 00272 size_t mapped_size; 00273 void *mapped_data = getWorkItemsData(mapped_size); 00274 00275 if (mapped_data) 00276 munmap(mapped_data, mapped_size); 00277 00278 return 0; 00279 }