1c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 2c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// Copyright 2012 Francisco Jerez 3c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 4c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// Permission is hereby granted, free of charge, to any person obtaining a 5c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// copy of this software and associated documentation files (the "Software"), 6c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// to deal in the Software without restriction, including without limitation 7c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// the rights to use, copy, modify, merge, publish, distribute, sublicense, 8c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// and/or sell copies of the Software, and to permit persons to whom the 9c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// Software is furnished to do so, subject to the following conditions: 10c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 11c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// The above copyright notice and this permission notice shall be included in 12c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// all copies or substantial portions of the Software. 13c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 14c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17f0cb66b69904b0a3e4083aa8874af63cf1c14321Kenneth Graunke// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18f0cb66b69904b0a3e4083aa8874af63cf1c14321Kenneth Graunke// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19f0cb66b69904b0a3e4083aa8874af63cf1c14321Kenneth Graunke// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20f0cb66b69904b0a3e4083aa8874af63cf1c14321Kenneth Graunke// OTHER DEALINGS IN THE SOFTWARE. 21c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// 22c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 23c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez#include "core/kernel.hpp" 24c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez#include "core/resource.hpp" 25bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez#include "util/factor.hpp" 26df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez#include "util/u_math.h" 27c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez#include "pipe/p_context.h" 28c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 29c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezusing namespace clover; 30c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 31c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerezkernel::kernel(clover::program &prog, const std::string &name, 3235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const std::vector<module::argument> &margs) : 33e9a4e74926ab67d1750c39b49a54df6fbcb0b593Francisco Jerez program(prog), _name(name), exec(*this), 34e9a4e74926ab67d1750c39b49a54df6fbcb0b593Francisco Jerez program_ref(prog._kernel_ref_counter) { 357a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez for (auto &marg : margs) { 36bf89a97748748592639087e8167e29c98c740d33Francisco Jerez if (marg.semantic == module::argument::general) 37bf89a97748748592639087e8167e29c98c740d33Francisco Jerez _args.emplace_back(argument::create(marg)); 38c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez } 39c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 40c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 417a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jereztemplate<typename V> 427a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezstatic inline std::vector<uint> 437a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezpad_vector(command_queue &q, const V &v, uint x) { 447a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez std::vector<uint> w { v.begin(), v.end() }; 45c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez w.resize(q.device().max_block_size().size(), x); 46c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return w; 47c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 48c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 49c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 5035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::launch(command_queue &q, 5135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const std::vector<size_t> &grid_offset, 5235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const std::vector<size_t> &grid_size, 5335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const std::vector<size_t> &block_size) { 541942490bae01d44a08f263ea2dc747d11c82acfeFrancisco Jerez const auto m = program().build(q.device()).binary; 557a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez const auto reduced_grid_size = 567a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez map(divides(), grid_size, block_size); 57bf89a97748748592639087e8167e29c98c740d33Francisco Jerez void *st = exec.bind(&q, grid_offset); 584d02e91e4938c98bcf9d4e57ab2e5463bb42e836Hans de Goede struct pipe_grid_info info = {}; 597a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez 607a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez // The handles are created during exec_context::bind(), so we need make 617a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez // sure to call exec_context::bind() before retrieving them. 627d61769e447e47022bea5e9fd415344b45a5a050Francisco Jerez std::vector<uint32_t *> g_handles = map([&](size_t h) { 637d61769e447e47022bea5e9fd415344b45a5a050Francisco Jerez return (uint32_t *)&exec.input[h]; 647d61769e447e47022bea5e9fd415344b45a5a050Francisco Jerez }, exec.g_handles); 65c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 66c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->bind_compute_state(q.pipe, st); 6793e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 6893e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul 0, exec.samplers.size(), 6993e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul exec.samplers.data()); 7093e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul 712901e2efcd28c1041b2afc145812c0ab7f75bf2aDavid Heidelberger q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, 722901e2efcd28c1041b2afc145812c0ab7f75bf2aDavid Heidelberger exec.sviews.size(), exec.sviews.data()); 73c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), 7435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez exec.resources.data()); 75c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), 76c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez exec.g_buffers.data(), g_handles.data()); 77c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 78bfd695e1d2975e5dd5363c2e7fcc3084a28457aaSamuel Pitoiset // Fill information for the launch_grid() call. 79ef8e50a841e79597ca56ae081102119329fd154cHans de Goede info.work_dim = grid_size.size(); 80a4cff1859efedac37368c1ddc55b091b6cd3eb65Serge Martin copy(pad_vector(q, block_size, 1), info.block); 81a4cff1859efedac37368c1ddc55b091b6cd3eb65Serge Martin copy(pad_vector(q, reduced_grid_size, 1), info.grid); 82a4cff1859efedac37368c1ddc55b091b6cd3eb65Serge Martin info.pc = find(name_equals(_name), m.syms).offset; 83bfd695e1d2975e5dd5363c2e7fcc3084a28457aaSamuel Pitoiset info.input = exec.input.data(); 84bfd695e1d2975e5dd5363c2e7fcc3084a28457aaSamuel Pitoiset 85bfd695e1d2975e5dd5363c2e7fcc3084a28457aaSamuel Pitoiset q.pipe->launch_grid(q.pipe, &info); 86c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 87c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL); 88c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL); 892901e2efcd28c1041b2afc145812c0ab7f75bf2aDavid Heidelberger q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0, 902901e2efcd28c1041b2afc145812c0ab7f75bf2aDavid Heidelberger exec.sviews.size(), NULL); 9193e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0, 9293e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul exec.samplers.size(), NULL); 93be5899dcf9a337548d8095a00060d4451b0df222Bas Nieuwenhuizen 94be5899dcf9a337548d8095a00060d4451b0df222Bas Nieuwenhuizen q.pipe->memory_barrier(q.pipe, PIPE_BARRIER_GLOBAL_BUFFER); 95c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez exec.unbind(); 96c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 97c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 98c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t 9935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::mem_local() const { 100c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez size_t sz = 0; 101c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 1027a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez for (auto &arg : args()) { 1037a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez if (dynamic_cast<local_argument *>(&arg)) 1047a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez sz += arg.storage(); 105c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez } 106c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 107c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return sz; 108c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 109c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 110c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t 11135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::mem_private() const { 112c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return 0; 113c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 114c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 115c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezconst std::string & 11635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::name() const { 1178e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez return _name; 118c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 119c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 120c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezstd::vector<size_t> 121bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerezkernel::optimal_block_size(const command_queue &q, 122bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez const std::vector<size_t> &grid_size) const { 123bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez return factor::find_grid_optimal_factor<size_t>( 124c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez q.device().max_threads_per_block(), q.device().max_block_size(), 125bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez grid_size); 126bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez} 127bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez 128bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerezstd::vector<size_t> 129bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerezkernel::required_block_size() const { 130c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return { 0, 0, 0 }; 131c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 132c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 1337a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezkernel::argument_range 1347a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezkernel::args() { 1357a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez return map(derefs(), _args); 1367a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez} 1377a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez 1387a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezkernel::const_argument_range 1397a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezkernel::args() const { 1407a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez return map(derefs(), _args); 1417a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez} 1427a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez 14335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezconst module & 14435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::module(const command_queue &q) const { 1451942490bae01d44a08f263ea2dc747d11c82acfeFrancisco Jerez return program().build(q.device()).binary; 146c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 147c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 14835307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::exec_context::exec_context(kernel &kern) : 1497a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez kern(kern), q(NULL), mem_local(0), st(NULL), cs() { 150c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 151c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 15235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::exec_context::~exec_context() { 153c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (st) 154c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez q->pipe->delete_compute_state(q->pipe, st); 155c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 156c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 157c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid * 158bf89a97748748592639087e8167e29c98c740d33Francisco Jerezkernel::exec_context::bind(intrusive_ptr<command_queue> _q, 159bf89a97748748592639087e8167e29c98c740d33Francisco Jerez const std::vector<size_t> &grid_offset) { 1608e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez std::swap(q, _q); 161c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 162a3dcab43c6b6fed2f35aa0e802be6398985f100cFrancisco Jerez // Bind kernel arguments. 1631942490bae01d44a08f263ea2dc747d11c82acfeFrancisco Jerez auto &m = kern.program().build(q->device()).binary; 1647a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez auto margs = find(name_equals(kern.name()), m.syms).args; 165cc495055cdfe7e39002180d095d09fe4b6905eb9Serge Martin auto msec = find(type_equals(module::section::text_executable), m.secs); 166bf89a97748748592639087e8167e29c98c740d33Francisco Jerez auto explicit_arg = kern._args.begin(); 1677a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez 168bf89a97748748592639087e8167e29c98c740d33Francisco Jerez for (auto &marg : margs) { 169bf89a97748748592639087e8167e29c98c740d33Francisco Jerez switch (marg.semantic) { 170bf89a97748748592639087e8167e29c98c740d33Francisco Jerez case module::argument::general: 171bf89a97748748592639087e8167e29c98c740d33Francisco Jerez (*(explicit_arg++))->bind(*this, marg); 1722286edce16e4f78500ccda77da99c30185d9c58fFrancisco Jerez break; 173bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 174bf89a97748748592639087e8167e29c98c740d33Francisco Jerez case module::argument::grid_dimension: { 175bf89a97748748592639087e8167e29c98c740d33Francisco Jerez const cl_uint dimension = grid_offset.size(); 176bf89a97748748592639087e8167e29c98c740d33Francisco Jerez auto arg = argument::create(marg); 177bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 178bf89a97748748592639087e8167e29c98c740d33Francisco Jerez arg->set(sizeof(dimension), &dimension); 179bf89a97748748592639087e8167e29c98c740d33Francisco Jerez arg->bind(*this, marg); 1802286edce16e4f78500ccda77da99c30185d9c58fFrancisco Jerez break; 181bf89a97748748592639087e8167e29c98c740d33Francisco Jerez } 182bf89a97748748592639087e8167e29c98c740d33Francisco Jerez case module::argument::grid_offset: { 18340c6d54e76c5e5859a78841ed305935b2ca6922cJan Vesely for (cl_uint x : pad_vector(*q, grid_offset, 0)) { 184bf89a97748748592639087e8167e29c98c740d33Francisco Jerez auto arg = argument::create(marg); 185bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 186bf89a97748748592639087e8167e29c98c740d33Francisco Jerez arg->set(sizeof(x), &x); 187bf89a97748748592639087e8167e29c98c740d33Francisco Jerez arg->bind(*this, marg); 188bf89a97748748592639087e8167e29c98c740d33Francisco Jerez } 1892286edce16e4f78500ccda77da99c30185d9c58fFrancisco Jerez break; 190bf89a97748748592639087e8167e29c98c740d33Francisco Jerez } 1919ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian case module::argument::image_size: { 1929ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get(); 1939ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian std::vector<cl_uint> image_size{ 1949ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian static_cast<cl_uint>(img->width()), 1959ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian static_cast<cl_uint>(img->height()), 1969ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian static_cast<cl_uint>(img->depth())}; 1979ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian for (auto x : image_size) { 1989ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian auto arg = argument::create(marg); 1999ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian 2009ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian arg->set(sizeof(x), &x); 2019ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian arg->bind(*this, marg); 2029ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian } 2039ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian break; 2049ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian } 2059ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian case module::argument::image_format: { 2069ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get(); 2079ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian cl_image_format fmt = img->format(); 2089ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian std::vector<cl_uint> image_format{ 2099ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian static_cast<cl_uint>(fmt.image_channel_data_type), 2109ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian static_cast<cl_uint>(fmt.image_channel_order)}; 2119ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian for (auto x : image_format) { 2129ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian auto arg = argument::create(marg); 2139ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian 2149ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian arg->set(sizeof(x), &x); 2159ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian arg->bind(*this, marg); 2169ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian } 2179ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian break; 2189ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian } 219bf89a97748748592639087e8167e29c98c740d33Francisco Jerez } 220bf89a97748748592639087e8167e29c98c740d33Francisco Jerez } 221c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 222c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez // Create a new compute state if anything changed. 2238e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez if (!st || q != _q || 224c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez cs.req_local_mem != mem_local || 225c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez cs.req_input_mem != input.size()) { 226c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (st) 2278e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez _q->pipe->delete_compute_state(_q->pipe, st); 228c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 229ea8f4a6b13b94eb060bff4ccc6c13efc01d2b682Bas Nieuwenhuizen cs.ir_type = q->device().ir_format(); 230d8f817ae7f4241a9ea23140805aaeb724a0ac851Serge Martin cs.prog = &(msec.data[0]); 231c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez cs.req_local_mem = mem_local; 232c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez cs.req_input_mem = input.size(); 233c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez st = q->pipe->create_compute_state(q->pipe, &cs); 234c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez } 235c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 236c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return st; 237c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 238c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 239c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 24035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::exec_context::unbind() { 2417a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez for (auto &arg : kern.args()) 2427a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez arg.unbind(*this); 243c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 244c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez input.clear(); 245c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez samplers.clear(); 246c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez sviews.clear(); 247c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez resources.clear(); 248c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez g_buffers.clear(); 249c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez g_handles.clear(); 250c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez mem_local = 0; 251c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 252c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 253829caf410e2c2c6f79902199da5a7900abc16129Francisco Jereznamespace { 254829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez template<typename T> 255829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez std::vector<uint8_t> 256829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez bytes(const T& x) { 257829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) }; 258829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez } 259829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez 260829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// 261829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// Transform buffer \a v from the native byte order into the byte 262829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// order specified by \a e. 263829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// 264829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez template<typename T> 265829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez void 266829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez byteswap(T &v, pipe_endian e) { 267829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez if (PIPE_ENDIAN_NATIVE != e) 268829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez std::reverse(v.begin(), v.end()); 269829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez } 270829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez 271df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez /// 272df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez /// Pad buffer \a v to the next multiple of \a n. 273df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez /// 274df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez template<typename T> 275df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez void 276df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez align(T &v, size_t n) { 277df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez v.resize(util_align_npot(v.size(), n)); 278df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez } 279df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez 280f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez bool 281f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez msb(const std::vector<uint8_t> &s) { 282f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE) 283f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez return s.back() & 0x80; 284f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez else 285f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez return s.front() & 0x80; 286f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez } 287f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez 288f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez /// 289f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez /// Resize buffer \a v to size \a n using sign or zero extension 290f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez /// according to \a ext. 291f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez /// 292f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez template<typename T> 293f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez void 29435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez extend(T &v, enum module::argument::ext_type ext, size_t n) { 295f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez const size_t m = std::min(v.size(), n); 296f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez const bool sign_ext = (ext == module::argument::sign_ext); 297f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0); 298f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez T w(n, fill); 299f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez 300f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE) 301f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez std::copy_n(v.begin(), m, w.begin()); 302f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez else 303f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez std::copy_n(v.end() - m, m, w.end() - m); 304f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez 305f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez std::swap(v, w); 306f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez } 307f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez 308829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// 309829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// Append buffer \a w to \a v. 310829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// 311829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez template<typename T> 312829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez void 313829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez insert(T &v, const T &w) { 314829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez v.insert(v.end(), w.begin(), w.end()); 315829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez } 316829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez 317829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// 318829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// Append \a n elements to the end of buffer \a v. 319829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez /// 320829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez template<typename T> 321829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez size_t 322829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez allocate(T &v, size_t n) { 323829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez size_t pos = v.size(); 324829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez v.resize(pos + n); 325829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez return pos; 326829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez } 327829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez} 328829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez 329bf89a97748748592639087e8167e29c98c740d33Francisco Jerezstd::unique_ptr<kernel::argument> 330bf89a97748748592639087e8167e29c98c740d33Francisco Jerezkernel::argument::create(const module::argument &marg) { 3313a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely switch (marg.type) { 3323a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely case module::argument::scalar: 3333a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely return std::unique_ptr<kernel::argument>(new scalar_argument(marg.size)); 334bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 3353a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely case module::argument::global: 3363a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely return std::unique_ptr<kernel::argument>(new global_argument); 337bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 3383a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely case module::argument::local: 3393a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely return std::unique_ptr<kernel::argument>(new local_argument); 340bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 3413a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely case module::argument::constant: 3423a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely return std::unique_ptr<kernel::argument>(new constant_argument); 343bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 3443a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely case module::argument::image2d_rd: 3453a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely case module::argument::image3d_rd: 3463a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely return std::unique_ptr<kernel::argument>(new image_rd_argument); 347bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 3483a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely case module::argument::image2d_wr: 3493a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely case module::argument::image3d_wr: 3503a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely return std::unique_ptr<kernel::argument>(new image_wr_argument); 351bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 3523a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely case module::argument::sampler: 3533a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely return std::unique_ptr<kernel::argument>(new sampler_argument); 354bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 3553a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely } 3563a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely throw error(CL_INVALID_KERNEL_DEFINITION); 357bf89a97748748592639087e8167e29c98c740d33Francisco Jerez} 358bf89a97748748592639087e8167e29c98c740d33Francisco Jerez 35935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::argument::argument() : _set(false) { 360c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 361c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 362c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezbool 36335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::argument::set() const { 3648e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez return _set; 365c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 366c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 367c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t 36835307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::argument::storage() const { 369c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez return 0; 370c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 371c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 37235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::scalar_argument::scalar_argument(size_t size) : size(size) { 373c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 374c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 375c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 37635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::scalar_argument::set(size_t size, const void *value) { 377be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian if (!value) 378be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian throw error(CL_INVALID_ARG_VALUE); 379be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian 380a3dcab43c6b6fed2f35aa0e802be6398985f100cFrancisco Jerez if (size != this->size) 381c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 382c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 383c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez v = { (uint8_t *)value, (uint8_t *)value + size }; 3848e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez _set = true; 385c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 386c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 387c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 38835307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::scalar_argument::bind(exec_context &ctx, 38935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const module::argument &marg) { 390829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez auto w = v; 391829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez 392f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez extend(w, marg.ext_type, marg.target_size); 393c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez byteswap(w, ctx.q->device().endianness()); 394df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez align(ctx.input, marg.target_align); 395829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez insert(ctx.input, w); 396c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 397c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 398c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 39935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::scalar_argument::unbind(exec_context &ctx) { 400c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 401c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 402c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 40335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::global_argument::set(size_t size, const void *value) { 404c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_mem)) 405c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 406c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 4076ec210989fa10847091f06fcfcab77dd07618dffJan Vesely buf = pobj<buffer>(value ? *(cl_mem *)value : NULL); 4088e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez _set = true; 409c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 410c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 411c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 41235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::global_argument::bind(exec_context &ctx, 41335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const module::argument &marg) { 414df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez align(ctx.input, marg.target_align); 4156ec210989fa10847091f06fcfcab77dd07618dffJan Vesely 4166ec210989fa10847091f06fcfcab77dd07618dffJan Vesely if (buf) { 417945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard const resource &r = buf->resource(*ctx.q); 418945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard ctx.g_handles.push_back(ctx.input.size()); 419945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard ctx.g_buffers.push_back(r.pipe); 420945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard 421945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard // How to handle multi-demensional offsets? 422945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard // We don't need to. Buffer offsets are always 423945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard // one-dimensional. 424945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard auto v = bytes(r.offset[0]); 425945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard extend(v, marg.ext_type, marg.target_size); 426945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard byteswap(v, ctx.q->device().endianness()); 427945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard insert(ctx.input, v); 4286ec210989fa10847091f06fcfcab77dd07618dffJan Vesely } else { 4296ec210989fa10847091f06fcfcab77dd07618dffJan Vesely // Null pointer. 4306ec210989fa10847091f06fcfcab77dd07618dffJan Vesely allocate(ctx.input, marg.target_size); 4316ec210989fa10847091f06fcfcab77dd07618dffJan Vesely } 432c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 433c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 434c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 43535307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::global_argument::unbind(exec_context &ctx) { 436c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 437c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 438c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t 43935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::local_argument::storage() const { 4408e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez return _storage; 441c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 442c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 443c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 44435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::local_argument::set(size_t size, const void *value) { 445c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (value) 446c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_VALUE); 447c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 448be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian if (!size) 449be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian throw error(CL_INVALID_ARG_SIZE); 450be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian 4518e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez _storage = size; 4528e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez _set = true; 453c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 454c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 455c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 45635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::local_argument::bind(exec_context &ctx, 45735307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const module::argument &marg) { 458829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez auto v = bytes(ctx.mem_local); 459c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 460f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez extend(v, module::argument::zero_ext, marg.target_size); 461c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez byteswap(v, ctx.q->device().endianness()); 462df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez align(ctx.input, marg.target_align); 463829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez insert(ctx.input, v); 464c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 4658e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez ctx.mem_local += _storage; 466c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 467c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 468c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 46935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::local_argument::unbind(exec_context &ctx) { 470c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 471c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 472c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 47335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::constant_argument::set(size_t size, const void *value) { 474c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_mem)) 475c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 476c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 4776ec210989fa10847091f06fcfcab77dd07618dffJan Vesely buf = pobj<buffer>(value ? *(cl_mem *)value : NULL); 4788e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez _set = true; 479c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 480c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 481c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 48235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::constant_argument::bind(exec_context &ctx, 48335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const module::argument &marg) { 484df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez align(ctx.input, marg.target_align); 485c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 4866ec210989fa10847091f06fcfcab77dd07618dffJan Vesely if (buf) { 487945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard resource &r = buf->resource(*ctx.q); 488945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard auto v = bytes(ctx.resources.size() << 24 | r.offset[0]); 4896ec210989fa10847091f06fcfcab77dd07618dffJan Vesely 4906ec210989fa10847091f06fcfcab77dd07618dffJan Vesely extend(v, module::argument::zero_ext, marg.target_size); 491c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez byteswap(v, ctx.q->device().endianness()); 4926ec210989fa10847091f06fcfcab77dd07618dffJan Vesely insert(ctx.input, v); 4936ec210989fa10847091f06fcfcab77dd07618dffJan Vesely 494945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard st = r.bind_surface(*ctx.q, false); 4956ec210989fa10847091f06fcfcab77dd07618dffJan Vesely ctx.resources.push_back(st); 4966ec210989fa10847091f06fcfcab77dd07618dffJan Vesely } else { 4976ec210989fa10847091f06fcfcab77dd07618dffJan Vesely // Null pointer. 4986ec210989fa10847091f06fcfcab77dd07618dffJan Vesely allocate(ctx.input, marg.target_size); 4996ec210989fa10847091f06fcfcab77dd07618dffJan Vesely } 500c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 501c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 502c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 50335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::constant_argument::unbind(exec_context &ctx) { 504198cd136b94b2ddfb8e2d50e567f3e391eb93915Francisco Jerez if (buf) 505198cd136b94b2ddfb8e2d50e567f3e391eb93915Francisco Jerez buf->resource(*ctx.q).unbind_surface(*ctx.q, st); 506c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 507c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 508c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 50935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_rd_argument::set(size_t size, const void *value) { 510be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian if (!value) 511be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian throw error(CL_INVALID_ARG_VALUE); 512be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian 513c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_mem)) 514c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 515c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 516d6f7afc3ed41a94175a0fdf9cf9651750104974cFrancisco Jerez img = &obj<image>(*(cl_mem *)value); 5178e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez _set = true; 518c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 519c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 520c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 52135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_rd_argument::bind(exec_context &ctx, 52235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const module::argument &marg) { 523829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez auto v = bytes(ctx.sviews.size()); 524c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 525f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez extend(v, module::argument::zero_ext, marg.target_size); 526c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez byteswap(v, ctx.q->device().endianness()); 527df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez align(ctx.input, marg.target_align); 528829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez insert(ctx.input, v); 529c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 53035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez st = img->resource(*ctx.q).bind_sampler_view(*ctx.q); 531829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez ctx.sviews.push_back(st); 532c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 533c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 534c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 53535307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_rd_argument::unbind(exec_context &ctx) { 53635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez img->resource(*ctx.q).unbind_sampler_view(*ctx.q, st); 537c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 538c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 539c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 54035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_wr_argument::set(size_t size, const void *value) { 541be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian if (!value) 542be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian throw error(CL_INVALID_ARG_VALUE); 543be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian 544c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_mem)) 545c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 546c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 547d6f7afc3ed41a94175a0fdf9cf9651750104974cFrancisco Jerez img = &obj<image>(*(cl_mem *)value); 5488e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez _set = true; 549c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 550c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 551c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 55235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_wr_argument::bind(exec_context &ctx, 55335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const module::argument &marg) { 554829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez auto v = bytes(ctx.resources.size()); 555c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 556f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez extend(v, module::argument::zero_ext, marg.target_size); 557c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez byteswap(v, ctx.q->device().endianness()); 558df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez align(ctx.input, marg.target_align); 559829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez insert(ctx.input, v); 560c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 56135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez st = img->resource(*ctx.q).bind_surface(*ctx.q, true); 562829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez ctx.resources.push_back(st); 563c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 564c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 565c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 56635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_wr_argument::unbind(exec_context &ctx) { 56735307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez img->resource(*ctx.q).unbind_surface(*ctx.q, st); 568c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 569c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 570c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 57135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::sampler_argument::set(size_t size, const void *value) { 572be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian if (!value) 573be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian throw error(CL_INVALID_SAMPLER); 574be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian 575c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez if (size != sizeof(cl_sampler)) 576c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez throw error(CL_INVALID_ARG_SIZE); 577c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 57804d0ab9f6456229df1a83b0b1c133e1c458aedd2Francisco Jerez s = &obj(*(cl_sampler *)value); 5798e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez _set = true; 580c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 581c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 582c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 58335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::sampler_argument::bind(exec_context &ctx, 58435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez const module::argument &marg) { 58535307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez st = s->bind(*ctx.q); 586829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez ctx.samplers.push_back(st); 587c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 588c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez 589c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid 59035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::sampler_argument::unbind(exec_context &ctx) { 59135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez s->unbind(*ctx.q, st); 592c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez} 593