1c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 2c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// Copyright 2012 Francisco Jerez 3c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 4c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// Permission is hereby granted, free of charge, to any person obtaining a 5c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// copy of this software and associated documentation files (the "Software"), 6c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// to deal in the Software without restriction, including without limitation 7c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// the rights to use, copy, modify, merge, publish, distribute, sublicense, 8c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// and/or sell copies of the Software, and to permit persons to whom the 9c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// Software is furnished to do so, subject to the following conditions: 10c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 11c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// The above copyright notice and this permission notice shall be included in 12c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// all copies or substantial portions of the Software. 13c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 14c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// SOFTWARE. 21c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 22c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 23c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez#include "core/kernel.hpp" 24c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez#include "core/resource.hpp" 25c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez#include "pipe/p_context.h" 26c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 27c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezusing namespace clover; 28c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 29c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::_cl_kernel(clover::program &prog, 30c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez const std::string &name, 31c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez const std::vector<clover::module::argument> &args) : 32c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez prog(prog), __name(name), exec(*this) { 33c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez for (auto arg : args) { 34c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (arg.type == module::argument::scalar) 35c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez this->args.emplace_back(new scalar_argument(arg.size)); 36c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez else if (arg.type == module::argument::global) 37c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez this->args.emplace_back(new global_argument(arg.size)); 38c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez else if (arg.type == module::argument::local) 39c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez this->args.emplace_back(new local_argument()); 40c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez else if (arg.type == module::argument::constant) 41c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez this->args.emplace_back(new constant_argument()); 42c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez else if (arg.type == module::argument::image2d_rd || 43c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez arg.type == module::argument::image3d_rd) 44c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez this->args.emplace_back(new image_rd_argument()); 45c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez else if (arg.type == module::argument::image2d_wr || 46c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez arg.type == module::argument::image3d_wr) 47c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez this->args.emplace_back(new image_wr_argument()); 48c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez else if (arg.type == module::argument::sampler) 49c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez this->args.emplace_back(new sampler_argument()); 50c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez else 51c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_KERNEL_DEFINITION); 52c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez } 53c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 54c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 55c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jereztemplate<typename T, typename V> 56c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezstatic inline std::vector<T> 57c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezpad_vector(clover::command_queue &q, const V &v, T x) { 58c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez std::vector<T> w { v.begin(), v.end() }; 59c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez w.resize(q.dev.max_block_size().size(), x); 60c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return w; 61c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 62c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 63c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 64c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::launch(clover::command_queue &q, 65c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez const std::vector<size_t> &grid_offset, 66c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez const std::vector<size_t> &grid_size, 67c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez const std::vector<size_t> &block_size) { 68c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez void *st = exec.bind(&q); 69c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; }, 70c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez exec.g_handles.begin(), exec.g_handles.end()); 71c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 72c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->bind_compute_state(q.pipe, st); 73c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), 74c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez exec.samplers.data()); 75c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), 76c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez exec.sviews.data()); 77c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), 78c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez exec.resources.data()); 79c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), 80c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez exec.g_buffers.data(), g_handles.data()); 81c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 82c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->launch_grid(q.pipe, 83c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez pad_vector<uint>(q, block_size, 1).data(), 84c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez pad_vector<uint>(q, grid_size, 1).data(), 85c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez module(q).sym(__name).offset, 86c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez exec.input.data()); 87c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 88c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); 89c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); 90c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL); 91c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), NULL); 92c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez exec.unbind(); 93c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 94c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 95c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t 96c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::mem_local() const { 97c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t sz = 0; 98c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 99c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez for (auto &arg : args) { 100c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (dynamic_cast<local_argument *>(arg.get())) 101c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez sz += arg->storage(); 102c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez } 103c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 104c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return sz; 105c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 106c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 107c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t 108c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::mem_private() const { 109c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return 0; 110c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 111c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 112c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t 113c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::max_block_size() const { 114c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return SIZE_MAX; 115c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 116c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 117c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezconst std::string & 118c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::name() const { 119c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return __name; 120c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 121c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 122c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezstd::vector<size_t> 123c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::block_size() const { 124c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return { 0, 0, 0 }; 125c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 126c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 127c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezconst clover::module & 128c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::module(const clover::command_queue &q) const { 129c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return prog.binaries().find(&q.dev)->second; 130c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 131c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 132c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 133c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::exec_context::exec_context(clover::kernel &kern) : 134c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez kern(kern), q(NULL), mem_local(0), st(NULL) { 135c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 136c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 137c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::exec_context::~exec_context() { 138c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (st) 139c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q->pipe->delete_compute_state(q->pipe, st); 140c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 141c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 142c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid * 143c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::exec_context::bind(clover::command_queue *__q) { 144c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez std::swap(q, __q); 145c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 146c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez for (auto &arg : kern.args) 147c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez arg->bind(*this); 148c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 149c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez // Create a new compute state if anything changed. 150c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (!st || q != __q || 151c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez cs.req_local_mem != mem_local || 152c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez cs.req_input_mem != input.size()) { 153c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (st) 154c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __q->pipe->delete_compute_state(__q->pipe, st); 155c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 156c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez cs.prog = kern.module(*q).sec(module::section::text).data.begin(); 157c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez cs.req_local_mem = mem_local; 158c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez cs.req_input_mem = input.size(); 159c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez st = q->pipe->create_compute_state(q->pipe, &cs); 160c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez } 161c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 162c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return st; 163c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 164c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 165c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 166c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::exec_context::unbind() { 167c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez for (auto &arg : kern.args) 168c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez arg->unbind(*this); 169c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 170c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez input.clear(); 171c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez samplers.clear(); 172c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez sviews.clear(); 173c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez resources.clear(); 174c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez g_buffers.clear(); 175c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez g_handles.clear(); 176c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez mem_local = 0; 177c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 178c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 179c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::argument::argument(size_t size) : 180c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __size(size), __set(false) { 181c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 182c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 183c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezbool 184c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::argument::set() const { 185c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return __set; 186c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 187c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 188c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t 189c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::argument::storage() const { 190c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return 0; 191c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 192c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 193c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::scalar_argument::scalar_argument(size_t size) : 194c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez argument(size) { 195c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 196c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 197c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 198c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::scalar_argument::set(size_t size, const void *value) { 199c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != __size) 200c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 201c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 202c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez v = { (uint8_t *)value, (uint8_t *)value + size }; 203c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __set = true; 204c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 205c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 206c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 207c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::scalar_argument::bind(exec_context &ctx) { 208c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.input.insert(ctx.input.end(), v.begin(), v.end()); 209c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 210c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 211c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 212c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::scalar_argument::unbind(exec_context &ctx) { 213c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 214c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 215c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::global_argument::global_argument(size_t size) : 216c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez argument(size) { 217c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 218c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 219c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 220c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::global_argument::set(size_t size, const void *value) { 221c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_mem)) 222c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 223c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 224c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value); 225c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __set = true; 226c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 227c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 228c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 229c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::global_argument::bind(exec_context &ctx) { 230c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t offset = ctx.input.size(); 231c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t idx = ctx.g_buffers.size(); 232c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 233c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.input.resize(offset + __size); 234c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 235c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.g_buffers.resize(idx + 1); 236c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.g_buffers[idx] = obj->resource(ctx.q).pipe; 237c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 238c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.g_handles.resize(idx + 1); 239c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.g_handles[idx] = offset; 240c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 241c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 242c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 243c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::global_argument::unbind(exec_context &ctx) { 244c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 245c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 246c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::local_argument::local_argument() : 247c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez argument(sizeof(uint32_t)) { 248c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 249c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 250c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t 251c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::local_argument::storage() const { 252c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return __storage; 253c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 254c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 255c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 256c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::local_argument::set(size_t size, const void *value) { 257c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (value) 258c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_VALUE); 259c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 260c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __storage = size; 261c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __set = true; 262c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 263c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 264c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 265c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::local_argument::bind(exec_context &ctx) { 266c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t offset = ctx.input.size(); 267c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t ptr = ctx.mem_local; 268c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 269c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.input.resize(offset + sizeof(uint32_t)); 270c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez *(uint32_t *)&ctx.input[offset] = ptr; 271c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 272c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.mem_local += __storage; 273c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 274c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 275c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 276c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::local_argument::unbind(exec_context &ctx) { 277c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 278c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 279c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::constant_argument::constant_argument() : 280c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez argument(sizeof(uint32_t)) { 281c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 282c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 283c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 284c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::constant_argument::set(size_t size, const void *value) { 285c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_mem)) 286c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 287c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 288c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value); 289c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __set = true; 290c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 291c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 292c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 293c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::constant_argument::bind(exec_context &ctx) { 294c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t offset = ctx.input.size(); 295c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t idx = ctx.resources.size(); 296c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 297c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.input.resize(offset + sizeof(uint32_t)); 298c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez *(uint32_t *)&ctx.input[offset] = idx << 24; 299c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 300c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.resources.resize(idx + 1); 301c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, false); 302c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 303c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 304c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 305c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::constant_argument::unbind(exec_context &ctx) { 306c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez obj->resource(ctx.q).unbind_surface(*ctx.q, st); 307c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 308c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 309c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::image_rd_argument::image_rd_argument() : 310c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez argument(sizeof(uint32_t)) { 311c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 312c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 313c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 314c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::image_rd_argument::set(size_t size, const void *value) { 315c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_mem)) 316c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 317c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 318c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez obj = dynamic_cast<clover::image *>(*(cl_mem *)value); 319c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __set = true; 320c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 321c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 322c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 323c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::image_rd_argument::bind(exec_context &ctx) { 324c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t offset = ctx.input.size(); 325c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t idx = ctx.sviews.size(); 326c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 327c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.input.resize(offset + sizeof(uint32_t)); 328c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez *(uint32_t *)&ctx.input[offset] = idx; 329c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 330c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.sviews.resize(idx + 1); 331c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.sviews[idx] = st = obj->resource(ctx.q).bind_sampler_view(*ctx.q); 332c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 333c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 334c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 335c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::image_rd_argument::unbind(exec_context &ctx) { 336c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st); 337c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 338c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 339c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::image_wr_argument::image_wr_argument() : 340c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez argument(sizeof(uint32_t)) { 341c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 342c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 343c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 344c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::image_wr_argument::set(size_t size, const void *value) { 345c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_mem)) 346c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 347c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 348c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez obj = dynamic_cast<clover::image *>(*(cl_mem *)value); 349c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __set = true; 350c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 351c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 352c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 353c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::image_wr_argument::bind(exec_context &ctx) { 354c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t offset = ctx.input.size(); 355c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t idx = ctx.resources.size(); 356c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 357c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.input.resize(offset + sizeof(uint32_t)); 358c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez *(uint32_t *)&ctx.input[offset] = idx; 359c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 360c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.resources.resize(idx + 1); 361c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, true); 362c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 363c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 364c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 365c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::image_wr_argument::unbind(exec_context &ctx) { 366c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez obj->resource(ctx.q).unbind_surface(*ctx.q, st); 367c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 368c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 369c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::sampler_argument::sampler_argument() : 370c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez argument(0) { 371c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 372c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 373c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 374c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::sampler_argument::set(size_t size, const void *value) { 375c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_sampler)) 376c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 377c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 378c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez obj = *(cl_sampler *)value; 379c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez __set = true; 380c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 381c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 382c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 383c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::sampler_argument::bind(exec_context &ctx) { 384c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t idx = ctx.samplers.size(); 385c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 386c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.samplers.resize(idx + 1); 387c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez ctx.samplers[idx] = st = obj->bind(*ctx.q); 388c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 389c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 390c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 391c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez_cl_kernel::sampler_argument::unbind(exec_context &ctx) { 392c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez obj->unbind(*ctx.q, st); 393c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 394