1// 2// Copyright 2012 Francisco Jerez 3// 4// Permission is hereby granted, free of charge, to any person obtaining a 5// copy of this software and associated documentation files (the "Software"), 6// to deal in the Software without restriction, including without limitation 7// the rights to use, copy, modify, merge, publish, distribute, sublicense, 8// and/or sell copies of the Software, and to permit persons to whom the 9// Software is furnished to do so, subject to the following conditions: 10// 11// The above copyright notice and this permission notice shall be included in 12// all copies or substantial portions of the Software. 13// 14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20// SOFTWARE. 21// 22 23#include "core/kernel.hpp" 24#include "core/resource.hpp" 25#include "pipe/p_context.h" 26 27using namespace clover; 28 29_cl_kernel::_cl_kernel(clover::program &prog, 30 const std::string &name, 31 const std::vector<clover::module::argument> &args) : 32 prog(prog), __name(name), exec(*this) { 33 for (auto arg : args) { 34 if (arg.type == module::argument::scalar) 35 this->args.emplace_back(new scalar_argument(arg.size)); 36 else if (arg.type == module::argument::global) 37 this->args.emplace_back(new global_argument(arg.size)); 38 else if (arg.type == module::argument::local) 39 this->args.emplace_back(new local_argument()); 40 else if (arg.type == module::argument::constant) 41 this->args.emplace_back(new constant_argument()); 42 else if (arg.type == module::argument::image2d_rd || 43 arg.type == module::argument::image3d_rd) 44 this->args.emplace_back(new image_rd_argument()); 45 else if (arg.type == module::argument::image2d_wr || 46 arg.type == module::argument::image3d_wr) 47 this->args.emplace_back(new image_wr_argument()); 48 else if (arg.type == module::argument::sampler) 49 this->args.emplace_back(new sampler_argument()); 50 else 51 throw error(CL_INVALID_KERNEL_DEFINITION); 52 } 53} 54 55template<typename T, typename V> 56static inline std::vector<T> 57pad_vector(clover::command_queue &q, const V &v, T x) { 58 std::vector<T> w { v.begin(), v.end() }; 59 w.resize(q.dev.max_block_size().size(), x); 60 return w; 61} 62 63void 64_cl_kernel::launch(clover::command_queue &q, 65 const std::vector<size_t> &grid_offset, 66 const std::vector<size_t> &grid_size, 67 const std::vector<size_t> &block_size) { 68 void *st = exec.bind(&q); 69 auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; }, 70 exec.g_handles.begin(), exec.g_handles.end()); 71 72 q.pipe->bind_compute_state(q.pipe, st); 73 q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), 74 exec.samplers.data()); 75 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), 76 exec.sviews.data()); 77 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), 78 exec.resources.data()); 79 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), 80 exec.g_buffers.data(), g_handles.data()); 81 82 q.pipe->launch_grid(q.pipe, 83 pad_vector<uint>(q, block_size, 1).data(), 84 pad_vector<uint>(q, grid_size, 1).data(), 85 module(q).sym(__name).offset, 86 exec.input.data()); 87 88 q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); 89 q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); 90 q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL); 91 q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), NULL); 92 exec.unbind(); 93} 94 95size_t 96_cl_kernel::mem_local() const { 97 size_t sz = 0; 98 99 for (auto &arg : args) { 100 if (dynamic_cast<local_argument *>(arg.get())) 101 sz += arg->storage(); 102 } 103 104 return sz; 105} 106 107size_t 108_cl_kernel::mem_private() const { 109 return 0; 110} 111 112size_t 113_cl_kernel::max_block_size() const { 114 return SIZE_MAX; 115} 116 117const std::string & 118_cl_kernel::name() const { 119 return __name; 120} 121 122std::vector<size_t> 123_cl_kernel::block_size() const { 124 return { 0, 0, 0 }; 125} 126 127const clover::module & 128_cl_kernel::module(const clover::command_queue &q) const { 129 return prog.binaries().find(&q.dev)->second; 130} 131 132 133_cl_kernel::exec_context::exec_context(clover::kernel &kern) : 134 kern(kern), q(NULL), mem_local(0), st(NULL) { 135} 136 137_cl_kernel::exec_context::~exec_context() { 138 if (st) 139 q->pipe->delete_compute_state(q->pipe, st); 140} 141 142void * 143_cl_kernel::exec_context::bind(clover::command_queue *__q) { 144 std::swap(q, __q); 145 146 for (auto &arg : kern.args) 147 arg->bind(*this); 148 149 // Create a new compute state if anything changed. 150 if (!st || q != __q || 151 cs.req_local_mem != mem_local || 152 cs.req_input_mem != input.size()) { 153 if (st) 154 __q->pipe->delete_compute_state(__q->pipe, st); 155 156 cs.prog = kern.module(*q).sec(module::section::text).data.begin(); 157 cs.req_local_mem = mem_local; 158 cs.req_input_mem = input.size(); 159 st = q->pipe->create_compute_state(q->pipe, &cs); 160 } 161 162 return st; 163} 164 165void 166_cl_kernel::exec_context::unbind() { 167 for (auto &arg : kern.args) 168 arg->unbind(*this); 169 170 input.clear(); 171 samplers.clear(); 172 sviews.clear(); 173 resources.clear(); 174 g_buffers.clear(); 175 g_handles.clear(); 176 mem_local = 0; 177} 178 179_cl_kernel::argument::argument(size_t size) : 180 __size(size), __set(false) { 181} 182 183bool 184_cl_kernel::argument::set() const { 185 return __set; 186} 187 188size_t 189_cl_kernel::argument::storage() const { 190 return 0; 191} 192 193_cl_kernel::scalar_argument::scalar_argument(size_t size) : 194 argument(size) { 195} 196 197void 198_cl_kernel::scalar_argument::set(size_t size, const void *value) { 199 if (size != __size) 200 throw error(CL_INVALID_ARG_SIZE); 201 202 v = { (uint8_t *)value, (uint8_t *)value + size }; 203 __set = true; 204} 205 206void 207_cl_kernel::scalar_argument::bind(exec_context &ctx) { 208 ctx.input.insert(ctx.input.end(), v.begin(), v.end()); 209} 210 211void 212_cl_kernel::scalar_argument::unbind(exec_context &ctx) { 213} 214 215_cl_kernel::global_argument::global_argument(size_t size) : 216 argument(size) { 217} 218 219void 220_cl_kernel::global_argument::set(size_t size, const void *value) { 221 if (size != sizeof(cl_mem)) 222 throw error(CL_INVALID_ARG_SIZE); 223 224 obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value); 225 __set = true; 226} 227 228void 229_cl_kernel::global_argument::bind(exec_context &ctx) { 230 size_t offset = ctx.input.size(); 231 size_t idx = ctx.g_buffers.size(); 232 233 ctx.input.resize(offset + __size); 234 235 ctx.g_buffers.resize(idx + 1); 236 ctx.g_buffers[idx] = obj->resource(ctx.q).pipe; 237 238 ctx.g_handles.resize(idx + 1); 239 ctx.g_handles[idx] = offset; 240} 241 242void 243_cl_kernel::global_argument::unbind(exec_context &ctx) { 244} 245 246_cl_kernel::local_argument::local_argument() : 247 argument(sizeof(uint32_t)) { 248} 249 250size_t 251_cl_kernel::local_argument::storage() const { 252 return __storage; 253} 254 255void 256_cl_kernel::local_argument::set(size_t size, const void *value) { 257 if (value) 258 throw error(CL_INVALID_ARG_VALUE); 259 260 __storage = size; 261 __set = true; 262} 263 264void 265_cl_kernel::local_argument::bind(exec_context &ctx) { 266 size_t offset = ctx.input.size(); 267 size_t ptr = ctx.mem_local; 268 269 ctx.input.resize(offset + sizeof(uint32_t)); 270 *(uint32_t *)&ctx.input[offset] = ptr; 271 272 ctx.mem_local += __storage; 273} 274 275void 276_cl_kernel::local_argument::unbind(exec_context &ctx) { 277} 278 279_cl_kernel::constant_argument::constant_argument() : 280 argument(sizeof(uint32_t)) { 281} 282 283void 284_cl_kernel::constant_argument::set(size_t size, const void *value) { 285 if (size != sizeof(cl_mem)) 286 throw error(CL_INVALID_ARG_SIZE); 287 288 obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value); 289 __set = true; 290} 291 292void 293_cl_kernel::constant_argument::bind(exec_context &ctx) { 294 size_t offset = ctx.input.size(); 295 size_t idx = ctx.resources.size(); 296 297 ctx.input.resize(offset + sizeof(uint32_t)); 298 *(uint32_t *)&ctx.input[offset] = idx << 24; 299 300 ctx.resources.resize(idx + 1); 301 ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, false); 302} 303 304void 305_cl_kernel::constant_argument::unbind(exec_context &ctx) { 306 obj->resource(ctx.q).unbind_surface(*ctx.q, st); 307} 308 309_cl_kernel::image_rd_argument::image_rd_argument() : 310 argument(sizeof(uint32_t)) { 311} 312 313void 314_cl_kernel::image_rd_argument::set(size_t size, const void *value) { 315 if (size != sizeof(cl_mem)) 316 throw error(CL_INVALID_ARG_SIZE); 317 318 obj = dynamic_cast<clover::image *>(*(cl_mem *)value); 319 __set = true; 320} 321 322void 323_cl_kernel::image_rd_argument::bind(exec_context &ctx) { 324 size_t offset = ctx.input.size(); 325 size_t idx = ctx.sviews.size(); 326 327 ctx.input.resize(offset + sizeof(uint32_t)); 328 *(uint32_t *)&ctx.input[offset] = idx; 329 330 ctx.sviews.resize(idx + 1); 331 ctx.sviews[idx] = st = obj->resource(ctx.q).bind_sampler_view(*ctx.q); 332} 333 334void 335_cl_kernel::image_rd_argument::unbind(exec_context &ctx) { 336 obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st); 337} 338 339_cl_kernel::image_wr_argument::image_wr_argument() : 340 argument(sizeof(uint32_t)) { 341} 342 343void 344_cl_kernel::image_wr_argument::set(size_t size, const void *value) { 345 if (size != sizeof(cl_mem)) 346 throw error(CL_INVALID_ARG_SIZE); 347 348 obj = dynamic_cast<clover::image *>(*(cl_mem *)value); 349 __set = true; 350} 351 352void 353_cl_kernel::image_wr_argument::bind(exec_context &ctx) { 354 size_t offset = ctx.input.size(); 355 size_t idx = ctx.resources.size(); 356 357 ctx.input.resize(offset + sizeof(uint32_t)); 358 *(uint32_t *)&ctx.input[offset] = idx; 359 360 ctx.resources.resize(idx + 1); 361 ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, true); 362} 363 364void 365_cl_kernel::image_wr_argument::unbind(exec_context &ctx) { 366 obj->resource(ctx.q).unbind_surface(*ctx.q, st); 367} 368 369_cl_kernel::sampler_argument::sampler_argument() : 370 argument(0) { 371} 372 373void 374_cl_kernel::sampler_argument::set(size_t size, const void *value) { 375 if (size != sizeof(cl_sampler)) 376 throw error(CL_INVALID_ARG_SIZE); 377 378 obj = *(cl_sampler *)value; 379 __set = true; 380} 381 382void 383_cl_kernel::sampler_argument::bind(exec_context &ctx) { 384 size_t idx = ctx.samplers.size(); 385 386 ctx.samplers.resize(idx + 1); 387 ctx.samplers[idx] = st = obj->bind(*ctx.q); 388} 389 390void 391_cl_kernel::sampler_argument::unbind(exec_context &ctx) { 392 obj->unbind(*ctx.q, st); 393} 394