1c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez//
2c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// Copyright 2012 Francisco Jerez
3c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez//
4c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// Permission is hereby granted, free of charge, to any person obtaining a
5c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// copy of this software and associated documentation files (the "Software"),
6c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// to deal in the Software without restriction, including without limitation
7c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// and/or sell copies of the Software, and to permit persons to whom the
9c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// Software is furnished to do so, subject to the following conditions:
10c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez//
11c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// The above copyright notice and this permission notice shall be included in
12c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// all copies or substantial portions of the Software.
13c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez//
14c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17f0cb66b69904b0a3e4083aa8874af63cf1c14321Kenneth Graunke// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18f0cb66b69904b0a3e4083aa8874af63cf1c14321Kenneth Graunke// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19f0cb66b69904b0a3e4083aa8874af63cf1c14321Kenneth Graunke// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20f0cb66b69904b0a3e4083aa8874af63cf1c14321Kenneth Graunke// OTHER DEALINGS IN THE SOFTWARE.
21c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez//
22c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
23c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez#include "core/kernel.hpp"
24c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez#include "core/resource.hpp"
25bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez#include "util/factor.hpp"
26df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez#include "util/u_math.h"
27c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez#include "pipe/p_context.h"
28c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
29c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezusing namespace clover;
30c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
31c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerezkernel::kernel(clover::program &prog, const std::string &name,
3235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez               const std::vector<module::argument> &margs) :
33e9a4e74926ab67d1750c39b49a54df6fbcb0b593Francisco Jerez   program(prog), _name(name), exec(*this),
34e9a4e74926ab67d1750c39b49a54df6fbcb0b593Francisco Jerez   program_ref(prog._kernel_ref_counter) {
357a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   for (auto &marg : margs) {
36bf89a97748748592639087e8167e29c98c740d33Francisco Jerez      if (marg.semantic == module::argument::general)
37bf89a97748748592639087e8167e29c98c740d33Francisco Jerez         _args.emplace_back(argument::create(marg));
38c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   }
39c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
40c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
417a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jereztemplate<typename V>
427a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezstatic inline std::vector<uint>
437a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezpad_vector(command_queue &q, const V &v, uint x) {
447a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   std::vector<uint> w { v.begin(), v.end() };
45c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez   w.resize(q.device().max_block_size().size(), x);
46c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   return w;
47c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
48c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
49c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
5035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::launch(command_queue &q,
5135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez               const std::vector<size_t> &grid_offset,
5235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez               const std::vector<size_t> &grid_size,
5335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez               const std::vector<size_t> &block_size) {
541942490bae01d44a08f263ea2dc747d11c82acfeFrancisco Jerez   const auto m = program().build(q.device()).binary;
557a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   const auto reduced_grid_size =
567a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez      map(divides(), grid_size, block_size);
57bf89a97748748592639087e8167e29c98c740d33Francisco Jerez   void *st = exec.bind(&q, grid_offset);
584d02e91e4938c98bcf9d4e57ab2e5463bb42e836Hans de Goede   struct pipe_grid_info info = {};
597a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez
607a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   // The handles are created during exec_context::bind(), so we need make
617a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   // sure to call exec_context::bind() before retrieving them.
627d61769e447e47022bea5e9fd415344b45a5a050Francisco Jerez   std::vector<uint32_t *> g_handles = map([&](size_t h) {
637d61769e447e47022bea5e9fd415344b45a5a050Francisco Jerez         return (uint32_t *)&exec.input[h];
647d61769e447e47022bea5e9fd415344b45a5a050Francisco Jerez      }, exec.g_handles);
65c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
66c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   q.pipe->bind_compute_state(q.pipe, st);
6793e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul   q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE,
6893e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul                               0, exec.samplers.size(),
6993e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul                               exec.samplers.data());
7093e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul
712901e2efcd28c1041b2afc145812c0ab7f75bf2aDavid Heidelberger   q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
722901e2efcd28c1041b2afc145812c0ab7f75bf2aDavid Heidelberger                             exec.sviews.size(), exec.sviews.data());
73c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
7435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez                                 exec.resources.data());
75c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
76c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez                              exec.g_buffers.data(), g_handles.data());
77c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
78bfd695e1d2975e5dd5363c2e7fcc3084a28457aaSamuel Pitoiset   // Fill information for the launch_grid() call.
79ef8e50a841e79597ca56ae081102119329fd154cHans de Goede   info.work_dim = grid_size.size();
80a4cff1859efedac37368c1ddc55b091b6cd3eb65Serge Martin   copy(pad_vector(q, block_size, 1), info.block);
81a4cff1859efedac37368c1ddc55b091b6cd3eb65Serge Martin   copy(pad_vector(q, reduced_grid_size, 1), info.grid);
82a4cff1859efedac37368c1ddc55b091b6cd3eb65Serge Martin   info.pc = find(name_equals(_name), m.syms).offset;
83bfd695e1d2975e5dd5363c2e7fcc3084a28457aaSamuel Pitoiset   info.input = exec.input.data();
84bfd695e1d2975e5dd5363c2e7fcc3084a28457aaSamuel Pitoiset
85bfd695e1d2975e5dd5363c2e7fcc3084a28457aaSamuel Pitoiset   q.pipe->launch_grid(q.pipe, &info);
86c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
87c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
88c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
892901e2efcd28c1041b2afc145812c0ab7f75bf2aDavid Heidelberger   q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
902901e2efcd28c1041b2afc145812c0ab7f75bf2aDavid Heidelberger                             exec.sviews.size(), NULL);
9193e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul   q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
9293e6694f2cdc3a93d1f33d178639aadfa7edb431Brian Paul                               exec.samplers.size(), NULL);
93be5899dcf9a337548d8095a00060d4451b0df222Bas Nieuwenhuizen
94be5899dcf9a337548d8095a00060d4451b0df222Bas Nieuwenhuizen   q.pipe->memory_barrier(q.pipe, PIPE_BARRIER_GLOBAL_BUFFER);
95c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   exec.unbind();
96c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
97c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
98c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t
9935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::mem_local() const {
100c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   size_t sz = 0;
101c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
1027a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   for (auto &arg : args()) {
1037a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez      if (dynamic_cast<local_argument *>(&arg))
1047a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez         sz += arg.storage();
105c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   }
106c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
107c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   return sz;
108c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
109c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
110c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t
11135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::mem_private() const {
112c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   return 0;
113c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
114c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
115c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezconst std::string &
11635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::name() const {
1178e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   return _name;
118c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
119c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
120c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezstd::vector<size_t>
121bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerezkernel::optimal_block_size(const command_queue &q,
122bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez                           const std::vector<size_t> &grid_size) const {
123bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez   return factor::find_grid_optimal_factor<size_t>(
124c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez      q.device().max_threads_per_block(), q.device().max_block_size(),
125bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez      grid_size);
126bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez}
127bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerez
128bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerezstd::vector<size_t>
129bf045bf9b409c47019fa7d9c859eaf8d50dd7032Francisco Jerezkernel::required_block_size() const {
130c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   return { 0, 0, 0 };
131c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
132c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
1337a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezkernel::argument_range
1347a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezkernel::args() {
1357a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   return map(derefs(), _args);
1367a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez}
1377a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez
1387a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezkernel::const_argument_range
1397a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerezkernel::args() const {
1407a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   return map(derefs(), _args);
1417a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez}
1427a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez
14335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezconst module &
14435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::module(const command_queue &q) const {
1451942490bae01d44a08f263ea2dc747d11c82acfeFrancisco Jerez   return program().build(q.device()).binary;
146c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
147c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
14835307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::exec_context::exec_context(kernel &kern) :
1497a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   kern(kern), q(NULL), mem_local(0), st(NULL), cs() {
150c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
151c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
15235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::exec_context::~exec_context() {
153c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   if (st)
154c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      q->pipe->delete_compute_state(q->pipe, st);
155c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
156c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
157c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid *
158bf89a97748748592639087e8167e29c98c740d33Francisco Jerezkernel::exec_context::bind(intrusive_ptr<command_queue> _q,
159bf89a97748748592639087e8167e29c98c740d33Francisco Jerez                           const std::vector<size_t> &grid_offset) {
1608e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   std::swap(q, _q);
161c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
162a3dcab43c6b6fed2f35aa0e802be6398985f100cFrancisco Jerez   // Bind kernel arguments.
1631942490bae01d44a08f263ea2dc747d11c82acfeFrancisco Jerez   auto &m = kern.program().build(q->device()).binary;
1647a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   auto margs = find(name_equals(kern.name()), m.syms).args;
165cc495055cdfe7e39002180d095d09fe4b6905eb9Serge Martin   auto msec = find(type_equals(module::section::text_executable), m.secs);
166bf89a97748748592639087e8167e29c98c740d33Francisco Jerez   auto explicit_arg = kern._args.begin();
1677a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez
168bf89a97748748592639087e8167e29c98c740d33Francisco Jerez   for (auto &marg : margs) {
169bf89a97748748592639087e8167e29c98c740d33Francisco Jerez      switch (marg.semantic) {
170bf89a97748748592639087e8167e29c98c740d33Francisco Jerez      case module::argument::general:
171bf89a97748748592639087e8167e29c98c740d33Francisco Jerez         (*(explicit_arg++))->bind(*this, marg);
1722286edce16e4f78500ccda77da99c30185d9c58fFrancisco Jerez         break;
173bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
174bf89a97748748592639087e8167e29c98c740d33Francisco Jerez      case module::argument::grid_dimension: {
175bf89a97748748592639087e8167e29c98c740d33Francisco Jerez         const cl_uint dimension = grid_offset.size();
176bf89a97748748592639087e8167e29c98c740d33Francisco Jerez         auto arg = argument::create(marg);
177bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
178bf89a97748748592639087e8167e29c98c740d33Francisco Jerez         arg->set(sizeof(dimension), &dimension);
179bf89a97748748592639087e8167e29c98c740d33Francisco Jerez         arg->bind(*this, marg);
1802286edce16e4f78500ccda77da99c30185d9c58fFrancisco Jerez         break;
181bf89a97748748592639087e8167e29c98c740d33Francisco Jerez      }
182bf89a97748748592639087e8167e29c98c740d33Francisco Jerez      case module::argument::grid_offset: {
18340c6d54e76c5e5859a78841ed305935b2ca6922cJan Vesely         for (cl_uint x : pad_vector(*q, grid_offset, 0)) {
184bf89a97748748592639087e8167e29c98c740d33Francisco Jerez            auto arg = argument::create(marg);
185bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
186bf89a97748748592639087e8167e29c98c740d33Francisco Jerez            arg->set(sizeof(x), &x);
187bf89a97748748592639087e8167e29c98c740d33Francisco Jerez            arg->bind(*this, marg);
188bf89a97748748592639087e8167e29c98c740d33Francisco Jerez         }
1892286edce16e4f78500ccda77da99c30185d9c58fFrancisco Jerez         break;
190bf89a97748748592639087e8167e29c98c740d33Francisco Jerez      }
1919ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian      case module::argument::image_size: {
1929ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
1939ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         std::vector<cl_uint> image_size{
1949ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian               static_cast<cl_uint>(img->width()),
1959ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian               static_cast<cl_uint>(img->height()),
1969ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian               static_cast<cl_uint>(img->depth())};
1979ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         for (auto x : image_size) {
1989ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian            auto arg = argument::create(marg);
1999ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian
2009ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian            arg->set(sizeof(x), &x);
2019ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian            arg->bind(*this, marg);
2029ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         }
2039ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         break;
2049ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian      }
2059ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian      case module::argument::image_format: {
2069ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
2079ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         cl_image_format fmt = img->format();
2089ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         std::vector<cl_uint> image_format{
2099ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian               static_cast<cl_uint>(fmt.image_channel_data_type),
2109ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian               static_cast<cl_uint>(fmt.image_channel_order)};
2119ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         for (auto x : image_format) {
2129ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian            auto arg = argument::create(marg);
2139ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian
2149ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian            arg->set(sizeof(x), &x);
2159ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian            arg->bind(*this, marg);
2169ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         }
2179ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian         break;
2189ef5b7a23348291893a6bf61fcce7a306e787addZoltan Gilian      }
219bf89a97748748592639087e8167e29c98c740d33Francisco Jerez      }
220bf89a97748748592639087e8167e29c98c740d33Francisco Jerez   }
221c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
222c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   // Create a new compute state if anything changed.
2238e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   if (!st || q != _q ||
224c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez       cs.req_local_mem != mem_local ||
225c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez       cs.req_input_mem != input.size()) {
226c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      if (st)
2278e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez         _q->pipe->delete_compute_state(_q->pipe, st);
228c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
229ea8f4a6b13b94eb060bff4ccc6c13efc01d2b682Bas Nieuwenhuizen      cs.ir_type = q->device().ir_format();
230d8f817ae7f4241a9ea23140805aaeb724a0ac851Serge Martin      cs.prog = &(msec.data[0]);
231c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      cs.req_local_mem = mem_local;
232c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      cs.req_input_mem = input.size();
233c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      st = q->pipe->create_compute_state(q->pipe, &cs);
234c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   }
235c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
236c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   return st;
237c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
238c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
239c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
24035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::exec_context::unbind() {
2417a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez   for (auto &arg : kern.args())
2427a9bbff7d641b82deae73e043fe1f02b7492993bFrancisco Jerez      arg.unbind(*this);
243c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
244c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   input.clear();
245c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   samplers.clear();
246c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   sviews.clear();
247c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   resources.clear();
248c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   g_buffers.clear();
249c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   g_handles.clear();
250c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   mem_local = 0;
251c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
252c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
253829caf410e2c2c6f79902199da5a7900abc16129Francisco Jereznamespace {
254829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   template<typename T>
255829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   std::vector<uint8_t>
256829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   bytes(const T& x) {
257829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez      return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) };
258829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   }
259829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez
260829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   ///
261829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   /// Transform buffer \a v from the native byte order into the byte
262829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   /// order specified by \a e.
263829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   ///
264829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   template<typename T>
265829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   void
266829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   byteswap(T &v, pipe_endian e) {
267829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez      if (PIPE_ENDIAN_NATIVE != e)
268829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez         std::reverse(v.begin(), v.end());
269829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   }
270829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez
271df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   ///
272df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   /// Pad buffer \a v to the next multiple of \a n.
273df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   ///
274df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   template<typename T>
275df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   void
276df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   align(T &v, size_t n) {
277df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez      v.resize(util_align_npot(v.size(), n));
278df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   }
279df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez
280f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   bool
281f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   msb(const std::vector<uint8_t> &s) {
282f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez      if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
283f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez         return s.back() & 0x80;
284f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez      else
285f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez         return s.front() & 0x80;
286f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   }
287f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez
288f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   ///
289f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   /// Resize buffer \a v to size \a n using sign or zero extension
290f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   /// according to \a ext.
291f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   ///
292f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   template<typename T>
293f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   void
29435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez   extend(T &v, enum module::argument::ext_type ext, size_t n) {
295f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez      const size_t m = std::min(v.size(), n);
296f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez      const bool sign_ext = (ext == module::argument::sign_ext);
297f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez      const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0);
298f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez      T w(n, fill);
299f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez
300f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez      if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
301f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez         std::copy_n(v.begin(), m, w.begin());
302f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez      else
303f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez         std::copy_n(v.end() - m, m, w.end() - m);
304f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez
305f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez      std::swap(v, w);
306f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   }
307f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez
308829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   ///
309829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   /// Append buffer \a w to \a v.
310829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   ///
311829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   template<typename T>
312829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   void
313829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   insert(T &v, const T &w) {
314829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez      v.insert(v.end(), w.begin(), w.end());
315829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   }
316829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez
317829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   ///
318829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   /// Append \a n elements to the end of buffer \a v.
319829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   ///
320829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   template<typename T>
321829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   size_t
322829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   allocate(T &v, size_t n) {
323829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez      size_t pos = v.size();
324829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez      v.resize(pos + n);
325829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez      return pos;
326829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   }
327829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez}
328829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez
329bf89a97748748592639087e8167e29c98c740d33Francisco Jerezstd::unique_ptr<kernel::argument>
330bf89a97748748592639087e8167e29c98c740d33Francisco Jerezkernel::argument::create(const module::argument &marg) {
3313a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   switch (marg.type) {
3323a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   case module::argument::scalar:
3333a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely      return std::unique_ptr<kernel::argument>(new scalar_argument(marg.size));
334bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
3353a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   case module::argument::global:
3363a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely      return std::unique_ptr<kernel::argument>(new global_argument);
337bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
3383a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   case module::argument::local:
3393a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely      return std::unique_ptr<kernel::argument>(new local_argument);
340bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
3413a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   case module::argument::constant:
3423a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely      return std::unique_ptr<kernel::argument>(new constant_argument);
343bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
3443a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   case module::argument::image2d_rd:
3453a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   case module::argument::image3d_rd:
3463a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely      return std::unique_ptr<kernel::argument>(new image_rd_argument);
347bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
3483a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   case module::argument::image2d_wr:
3493a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   case module::argument::image3d_wr:
3503a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely      return std::unique_ptr<kernel::argument>(new image_wr_argument);
351bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
3523a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   case module::argument::sampler:
3533a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely      return std::unique_ptr<kernel::argument>(new sampler_argument);
354bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
3553a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   }
3563a18fc60581ad27811b0b4b22fce51da0ae8a008Jan Vesely   throw error(CL_INVALID_KERNEL_DEFINITION);
357bf89a97748748592639087e8167e29c98c740d33Francisco Jerez}
358bf89a97748748592639087e8167e29c98c740d33Francisco Jerez
35935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::argument::argument() : _set(false) {
360c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
361c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
362c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezbool
36335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::argument::set() const {
3648e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   return _set;
365c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
366c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
367c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t
36835307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::argument::storage() const {
369c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   return 0;
370c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
371c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
37235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::scalar_argument::scalar_argument(size_t size) : size(size) {
373c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
374c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
375c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
37635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::scalar_argument::set(size_t size, const void *value) {
377be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian   if (!value)
378be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian      throw error(CL_INVALID_ARG_VALUE);
379be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian
380a3dcab43c6b6fed2f35aa0e802be6398985f100cFrancisco Jerez   if (size != this->size)
381c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      throw error(CL_INVALID_ARG_SIZE);
382c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
383c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   v = { (uint8_t *)value, (uint8_t *)value + size };
3848e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   _set = true;
385c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
386c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
387c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
38835307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::scalar_argument::bind(exec_context &ctx,
38935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez                              const module::argument &marg) {
390829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   auto w = v;
391829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez
392f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   extend(w, marg.ext_type, marg.target_size);
393c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez   byteswap(w, ctx.q->device().endianness());
394df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   align(ctx.input, marg.target_align);
395829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   insert(ctx.input, w);
396c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
397c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
398c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
39935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::scalar_argument::unbind(exec_context &ctx) {
400c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
401c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
402c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
40335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::global_argument::set(size_t size, const void *value) {
404c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   if (size != sizeof(cl_mem))
405c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      throw error(CL_INVALID_ARG_SIZE);
406c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
4076ec210989fa10847091f06fcfcab77dd07618dffJan Vesely   buf = pobj<buffer>(value ? *(cl_mem *)value : NULL);
4088e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   _set = true;
409c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
410c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
411c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
41235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::global_argument::bind(exec_context &ctx,
41335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez                              const module::argument &marg) {
414df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   align(ctx.input, marg.target_align);
4156ec210989fa10847091f06fcfcab77dd07618dffJan Vesely
4166ec210989fa10847091f06fcfcab77dd07618dffJan Vesely   if (buf) {
417945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      const resource &r = buf->resource(*ctx.q);
418945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      ctx.g_handles.push_back(ctx.input.size());
419945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      ctx.g_buffers.push_back(r.pipe);
420945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard
421945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      // How to handle multi-demensional offsets?
422945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      // We don't need to.  Buffer offsets are always
423945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      // one-dimensional.
424945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      auto v = bytes(r.offset[0]);
425945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      extend(v, marg.ext_type, marg.target_size);
426945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      byteswap(v, ctx.q->device().endianness());
427945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      insert(ctx.input, v);
4286ec210989fa10847091f06fcfcab77dd07618dffJan Vesely   } else {
4296ec210989fa10847091f06fcfcab77dd07618dffJan Vesely      // Null pointer.
4306ec210989fa10847091f06fcfcab77dd07618dffJan Vesely      allocate(ctx.input, marg.target_size);
4316ec210989fa10847091f06fcfcab77dd07618dffJan Vesely   }
432c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
433c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
434c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
43535307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::global_argument::unbind(exec_context &ctx) {
436c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
437c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
438c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezsize_t
43935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::local_argument::storage() const {
4408e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   return _storage;
441c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
442c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
443c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
44435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::local_argument::set(size_t size, const void *value) {
445c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   if (value)
446c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      throw error(CL_INVALID_ARG_VALUE);
447c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
448be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian   if (!size)
449be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian      throw error(CL_INVALID_ARG_SIZE);
450be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian
4518e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   _storage = size;
4528e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   _set = true;
453c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
454c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
455c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
45635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::local_argument::bind(exec_context &ctx,
45735307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez                             const module::argument &marg) {
458829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   auto v = bytes(ctx.mem_local);
459c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
460f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   extend(v, module::argument::zero_ext, marg.target_size);
461c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez   byteswap(v, ctx.q->device().endianness());
462df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   align(ctx.input, marg.target_align);
463829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   insert(ctx.input, v);
464c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
4658e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   ctx.mem_local += _storage;
466c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
467c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
468c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
46935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::local_argument::unbind(exec_context &ctx) {
470c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
471c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
472c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
47335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::constant_argument::set(size_t size, const void *value) {
474c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   if (size != sizeof(cl_mem))
475c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      throw error(CL_INVALID_ARG_SIZE);
476c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
4776ec210989fa10847091f06fcfcab77dd07618dffJan Vesely   buf = pobj<buffer>(value ? *(cl_mem *)value : NULL);
4788e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   _set = true;
479c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
480c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
481c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
48235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::constant_argument::bind(exec_context &ctx,
48335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez                                const module::argument &marg) {
484df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   align(ctx.input, marg.target_align);
485c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
4866ec210989fa10847091f06fcfcab77dd07618dffJan Vesely   if (buf) {
487945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      resource &r = buf->resource(*ctx.q);
488945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      auto v = bytes(ctx.resources.size() << 24 | r.offset[0]);
4896ec210989fa10847091f06fcfcab77dd07618dffJan Vesely
4906ec210989fa10847091f06fcfcab77dd07618dffJan Vesely      extend(v, module::argument::zero_ext, marg.target_size);
491c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez      byteswap(v, ctx.q->device().endianness());
4926ec210989fa10847091f06fcfcab77dd07618dffJan Vesely      insert(ctx.input, v);
4936ec210989fa10847091f06fcfcab77dd07618dffJan Vesely
494945d87f95877e198fb2203f47a4ebbccae883978Tom Stellard      st = r.bind_surface(*ctx.q, false);
4956ec210989fa10847091f06fcfcab77dd07618dffJan Vesely      ctx.resources.push_back(st);
4966ec210989fa10847091f06fcfcab77dd07618dffJan Vesely   } else {
4976ec210989fa10847091f06fcfcab77dd07618dffJan Vesely      // Null pointer.
4986ec210989fa10847091f06fcfcab77dd07618dffJan Vesely      allocate(ctx.input, marg.target_size);
4996ec210989fa10847091f06fcfcab77dd07618dffJan Vesely   }
500c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
501c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
502c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
50335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::constant_argument::unbind(exec_context &ctx) {
504198cd136b94b2ddfb8e2d50e567f3e391eb93915Francisco Jerez   if (buf)
505198cd136b94b2ddfb8e2d50e567f3e391eb93915Francisco Jerez      buf->resource(*ctx.q).unbind_surface(*ctx.q, st);
506c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
507c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
508c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
50935307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_rd_argument::set(size_t size, const void *value) {
510be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian   if (!value)
511be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian      throw error(CL_INVALID_ARG_VALUE);
512be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian
513c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   if (size != sizeof(cl_mem))
514c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      throw error(CL_INVALID_ARG_SIZE);
515c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
516d6f7afc3ed41a94175a0fdf9cf9651750104974cFrancisco Jerez   img = &obj<image>(*(cl_mem *)value);
5178e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   _set = true;
518c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
519c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
520c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
52135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_rd_argument::bind(exec_context &ctx,
52235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez                                const module::argument &marg) {
523829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   auto v = bytes(ctx.sviews.size());
524c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
525f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   extend(v, module::argument::zero_ext, marg.target_size);
526c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez   byteswap(v, ctx.q->device().endianness());
527df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   align(ctx.input, marg.target_align);
528829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   insert(ctx.input, v);
529c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
53035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez   st = img->resource(*ctx.q).bind_sampler_view(*ctx.q);
531829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   ctx.sviews.push_back(st);
532c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
533c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
534c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
53535307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_rd_argument::unbind(exec_context &ctx) {
53635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez   img->resource(*ctx.q).unbind_sampler_view(*ctx.q, st);
537c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
538c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
539c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
54035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_wr_argument::set(size_t size, const void *value) {
541be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian   if (!value)
542be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian      throw error(CL_INVALID_ARG_VALUE);
543be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian
544c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   if (size != sizeof(cl_mem))
545c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      throw error(CL_INVALID_ARG_SIZE);
546c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
547d6f7afc3ed41a94175a0fdf9cf9651750104974cFrancisco Jerez   img = &obj<image>(*(cl_mem *)value);
5488e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   _set = true;
549c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
550c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
551c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
55235307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_wr_argument::bind(exec_context &ctx,
55335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez                                const module::argument &marg) {
554829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   auto v = bytes(ctx.resources.size());
555c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
556f64c0ca692d3e8c78dd9ae1f015f58f1dfc1c760Francisco Jerez   extend(v, module::argument::zero_ext, marg.target_size);
557c4578d2277155c50c8680849763850cddb8e8ec2Francisco Jerez   byteswap(v, ctx.q->device().endianness());
558df530829f757a8968389427eb26f45a0d46623faFrancisco Jerez   align(ctx.input, marg.target_align);
559829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   insert(ctx.input, v);
560c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
56135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez   st = img->resource(*ctx.q).bind_surface(*ctx.q, true);
562829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   ctx.resources.push_back(st);
563c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
564c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
565c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
56635307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::image_wr_argument::unbind(exec_context &ctx) {
56735307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez   img->resource(*ctx.q).unbind_surface(*ctx.q, st);
568c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
569c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
570c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
57135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::sampler_argument::set(size_t size, const void *value) {
572be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian   if (!value)
573be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian      throw error(CL_INVALID_SAMPLER);
574be3622dce383cb930a233b88bb056adb026dce1fZoltan Gilian
575c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez   if (size != sizeof(cl_sampler))
576c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez      throw error(CL_INVALID_ARG_SIZE);
577c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
57804d0ab9f6456229df1a83b0b1c133e1c458aedd2Francisco Jerez   s = &obj(*(cl_sampler *)value);
5798e14b82fd2c561cac3e0d5f84d5f67815ad53867Francisco Jerez   _set = true;
580c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
581c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
582c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
58335307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::sampler_argument::bind(exec_context &ctx,
58435307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez                               const module::argument &marg) {
58535307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez   st = s->bind(*ctx.q);
586829caf410e2c2c6f79902199da5a7900abc16129Francisco Jerez   ctx.samplers.push_back(st);
587c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
588c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez
589c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerezvoid
59035307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerezkernel::sampler_argument::unbind(exec_context &ctx) {
59135307f540fedf9680ce8b05d0784c5b0d5b0f6a7Francisco Jerez   s->unbind(*ctx.q, st);
592c6db1b3396384186aab5b685fe1fd540e17b3a62Francisco Jerez}
593