1//
2// Copyright 2012 Francisco Jerez
3//
4// Permission is hereby granted, free of charge, to any person obtaining a
5// copy of this software and associated documentation files (the "Software"),
6// to deal in the Software without restriction, including without limitation
7// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8// and/or sell copies of the Software, and to permit persons to whom the
9// Software is furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20// SOFTWARE.
21//
22
23#include "core/kernel.hpp"
24#include "core/resource.hpp"
25#include "pipe/p_context.h"
26
27using namespace clover;
28
29_cl_kernel::_cl_kernel(clover::program &prog,
30                       const std::string &name,
31                       const std::vector<clover::module::argument> &args) :
32   prog(prog), __name(name), exec(*this) {
33   for (auto arg : args) {
34      if (arg.type == module::argument::scalar)
35         this->args.emplace_back(new scalar_argument(arg.size));
36      else if (arg.type == module::argument::global)
37         this->args.emplace_back(new global_argument(arg.size));
38      else if (arg.type == module::argument::local)
39         this->args.emplace_back(new local_argument());
40      else if (arg.type == module::argument::constant)
41         this->args.emplace_back(new constant_argument());
42      else if (arg.type == module::argument::image2d_rd ||
43               arg.type == module::argument::image3d_rd)
44         this->args.emplace_back(new image_rd_argument());
45      else if (arg.type == module::argument::image2d_wr ||
46               arg.type == module::argument::image3d_wr)
47         this->args.emplace_back(new image_wr_argument());
48      else if (arg.type == module::argument::sampler)
49         this->args.emplace_back(new sampler_argument());
50      else
51         throw error(CL_INVALID_KERNEL_DEFINITION);
52   }
53}
54
55template<typename T, typename V>
56static inline std::vector<T>
57pad_vector(clover::command_queue &q, const V &v, T x) {
58   std::vector<T> w { v.begin(), v.end() };
59   w.resize(q.dev.max_block_size().size(), x);
60   return w;
61}
62
63void
64_cl_kernel::launch(clover::command_queue &q,
65                   const std::vector<size_t> &grid_offset,
66                   const std::vector<size_t> &grid_size,
67                   const std::vector<size_t> &block_size) {
68   void *st = exec.bind(&q);
69   auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; },
70                        exec.g_handles.begin(), exec.g_handles.end());
71
72   q.pipe->bind_compute_state(q.pipe, st);
73   q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(),
74                                       exec.samplers.data());
75   q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(),
76                                     exec.sviews.data());
77   q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
78                                     exec.resources.data());
79   q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
80                              exec.g_buffers.data(), g_handles.data());
81
82   q.pipe->launch_grid(q.pipe,
83                       pad_vector<uint>(q, block_size, 1).data(),
84                       pad_vector<uint>(q, grid_size, 1).data(),
85                       module(q).sym(__name).offset,
86                       exec.input.data());
87
88   q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
89   q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
90   q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL);
91   q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), NULL);
92   exec.unbind();
93}
94
95size_t
96_cl_kernel::mem_local() const {
97   size_t sz = 0;
98
99   for (auto &arg : args) {
100      if (dynamic_cast<local_argument *>(arg.get()))
101         sz += arg->storage();
102   }
103
104   return sz;
105}
106
107size_t
108_cl_kernel::mem_private() const {
109   return 0;
110}
111
112size_t
113_cl_kernel::max_block_size() const {
114   return SIZE_MAX;
115}
116
117const std::string &
118_cl_kernel::name() const {
119   return __name;
120}
121
122std::vector<size_t>
123_cl_kernel::block_size() const {
124   return { 0, 0, 0 };
125}
126
127const clover::module &
128_cl_kernel::module(const clover::command_queue &q) const {
129   return prog.binaries().find(&q.dev)->second;
130}
131
132
133_cl_kernel::exec_context::exec_context(clover::kernel &kern) :
134   kern(kern), q(NULL), mem_local(0), st(NULL) {
135}
136
137_cl_kernel::exec_context::~exec_context() {
138   if (st)
139      q->pipe->delete_compute_state(q->pipe, st);
140}
141
142void *
143_cl_kernel::exec_context::bind(clover::command_queue *__q) {
144   std::swap(q, __q);
145
146   for (auto &arg : kern.args)
147      arg->bind(*this);
148
149   // Create a new compute state if anything changed.
150   if (!st || q != __q ||
151       cs.req_local_mem != mem_local ||
152       cs.req_input_mem != input.size()) {
153      if (st)
154         __q->pipe->delete_compute_state(__q->pipe, st);
155
156      cs.prog = kern.module(*q).sec(module::section::text).data.begin();
157      cs.req_local_mem = mem_local;
158      cs.req_input_mem = input.size();
159      st = q->pipe->create_compute_state(q->pipe, &cs);
160   }
161
162   return st;
163}
164
165void
166_cl_kernel::exec_context::unbind() {
167   for (auto &arg : kern.args)
168      arg->unbind(*this);
169
170   input.clear();
171   samplers.clear();
172   sviews.clear();
173   resources.clear();
174   g_buffers.clear();
175   g_handles.clear();
176   mem_local = 0;
177}
178
179_cl_kernel::argument::argument(size_t size) :
180   __size(size), __set(false) {
181}
182
183bool
184_cl_kernel::argument::set() const {
185   return __set;
186}
187
188size_t
189_cl_kernel::argument::storage() const {
190   return 0;
191}
192
193_cl_kernel::scalar_argument::scalar_argument(size_t size) :
194   argument(size) {
195}
196
197void
198_cl_kernel::scalar_argument::set(size_t size, const void *value) {
199   if (size != __size)
200      throw error(CL_INVALID_ARG_SIZE);
201
202   v = { (uint8_t *)value, (uint8_t *)value + size };
203   __set = true;
204}
205
206void
207_cl_kernel::scalar_argument::bind(exec_context &ctx) {
208   ctx.input.insert(ctx.input.end(), v.begin(), v.end());
209}
210
211void
212_cl_kernel::scalar_argument::unbind(exec_context &ctx) {
213}
214
215_cl_kernel::global_argument::global_argument(size_t size) :
216   argument(size) {
217}
218
219void
220_cl_kernel::global_argument::set(size_t size, const void *value) {
221   if (size != sizeof(cl_mem))
222      throw error(CL_INVALID_ARG_SIZE);
223
224   obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
225   __set = true;
226}
227
228void
229_cl_kernel::global_argument::bind(exec_context &ctx) {
230   size_t offset = ctx.input.size();
231   size_t idx = ctx.g_buffers.size();
232
233   ctx.input.resize(offset + __size);
234
235   ctx.g_buffers.resize(idx + 1);
236   ctx.g_buffers[idx] = obj->resource(ctx.q).pipe;
237
238   ctx.g_handles.resize(idx + 1);
239   ctx.g_handles[idx] = offset;
240}
241
242void
243_cl_kernel::global_argument::unbind(exec_context &ctx) {
244}
245
246_cl_kernel::local_argument::local_argument() :
247   argument(sizeof(uint32_t)) {
248}
249
250size_t
251_cl_kernel::local_argument::storage() const {
252   return __storage;
253}
254
255void
256_cl_kernel::local_argument::set(size_t size, const void *value) {
257   if (value)
258      throw error(CL_INVALID_ARG_VALUE);
259
260   __storage = size;
261   __set = true;
262}
263
264void
265_cl_kernel::local_argument::bind(exec_context &ctx) {
266   size_t offset = ctx.input.size();
267   size_t ptr = ctx.mem_local;
268
269   ctx.input.resize(offset + sizeof(uint32_t));
270   *(uint32_t *)&ctx.input[offset] = ptr;
271
272   ctx.mem_local += __storage;
273}
274
275void
276_cl_kernel::local_argument::unbind(exec_context &ctx) {
277}
278
279_cl_kernel::constant_argument::constant_argument() :
280   argument(sizeof(uint32_t)) {
281}
282
283void
284_cl_kernel::constant_argument::set(size_t size, const void *value) {
285   if (size != sizeof(cl_mem))
286      throw error(CL_INVALID_ARG_SIZE);
287
288   obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
289   __set = true;
290}
291
292void
293_cl_kernel::constant_argument::bind(exec_context &ctx) {
294   size_t offset = ctx.input.size();
295   size_t idx = ctx.resources.size();
296
297   ctx.input.resize(offset + sizeof(uint32_t));
298   *(uint32_t *)&ctx.input[offset] = idx << 24;
299
300   ctx.resources.resize(idx + 1);
301   ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, false);
302}
303
304void
305_cl_kernel::constant_argument::unbind(exec_context &ctx) {
306   obj->resource(ctx.q).unbind_surface(*ctx.q, st);
307}
308
309_cl_kernel::image_rd_argument::image_rd_argument() :
310   argument(sizeof(uint32_t)) {
311}
312
313void
314_cl_kernel::image_rd_argument::set(size_t size, const void *value) {
315   if (size != sizeof(cl_mem))
316      throw error(CL_INVALID_ARG_SIZE);
317
318   obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
319   __set = true;
320}
321
322void
323_cl_kernel::image_rd_argument::bind(exec_context &ctx) {
324   size_t offset = ctx.input.size();
325   size_t idx = ctx.sviews.size();
326
327   ctx.input.resize(offset + sizeof(uint32_t));
328   *(uint32_t *)&ctx.input[offset] = idx;
329
330   ctx.sviews.resize(idx + 1);
331   ctx.sviews[idx] = st = obj->resource(ctx.q).bind_sampler_view(*ctx.q);
332}
333
334void
335_cl_kernel::image_rd_argument::unbind(exec_context &ctx) {
336   obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st);
337}
338
339_cl_kernel::image_wr_argument::image_wr_argument() :
340   argument(sizeof(uint32_t)) {
341}
342
343void
344_cl_kernel::image_wr_argument::set(size_t size, const void *value) {
345   if (size != sizeof(cl_mem))
346      throw error(CL_INVALID_ARG_SIZE);
347
348   obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
349   __set = true;
350}
351
352void
353_cl_kernel::image_wr_argument::bind(exec_context &ctx) {
354   size_t offset = ctx.input.size();
355   size_t idx = ctx.resources.size();
356
357   ctx.input.resize(offset + sizeof(uint32_t));
358   *(uint32_t *)&ctx.input[offset] = idx;
359
360   ctx.resources.resize(idx + 1);
361   ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, true);
362}
363
364void
365_cl_kernel::image_wr_argument::unbind(exec_context &ctx) {
366   obj->resource(ctx.q).unbind_surface(*ctx.q, st);
367}
368
369_cl_kernel::sampler_argument::sampler_argument() :
370   argument(0) {
371}
372
373void
374_cl_kernel::sampler_argument::set(size_t size, const void *value) {
375   if (size != sizeof(cl_sampler))
376      throw error(CL_INVALID_ARG_SIZE);
377
378   obj = *(cl_sampler *)value;
379   __set = true;
380}
381
382void
383_cl_kernel::sampler_argument::bind(exec_context &ctx) {
384   size_t idx = ctx.samplers.size();
385
386   ctx.samplers.resize(idx + 1);
387   ctx.samplers[idx] = st = obj->bind(*ctx.q);
388}
389
390void
391_cl_kernel::sampler_argument::unbind(exec_context &ctx) {
392   obj->unbind(*ctx.q, st);
393}
394