kernel.cpp revision fcab4d4a34a28ec153612e41d5850884c5ccbfb9
1//
2// Copyright 2012 Francisco Jerez
3//
4// Permission is hereby granted, free of charge, to any person obtaining a
5// copy of this software and associated documentation files (the "Software"),
6// to deal in the Software without restriction, including without limitation
7// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8// and/or sell copies of the Software, and to permit persons to whom the
9// Software is furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20// SOFTWARE.
21//
22
23#include "api/util.hpp"
24#include "core/kernel.hpp"
25#include "core/event.hpp"
26
27using namespace clover;
28
29PUBLIC cl_kernel
30clCreateKernel(cl_program prog, const char *name,
31               cl_int *errcode_ret) try {
32   if (!prog)
33      throw error(CL_INVALID_PROGRAM);
34
35   if (!name)
36      throw error(CL_INVALID_VALUE);
37
38   if (prog->binaries().empty())
39      throw error(CL_INVALID_PROGRAM_EXECUTABLE);
40
41   auto sym = prog->binaries().begin()->second.sym(name);
42
43   ret_error(errcode_ret, CL_SUCCESS);
44   return new kernel(*prog, name, { sym.args.begin(), sym.args.end() });
45
46} catch (module::noent_error &e) {
47   ret_error(errcode_ret, CL_INVALID_KERNEL_NAME);
48   return NULL;
49
50} catch(error &e) {
51   ret_error(errcode_ret, e);
52   return NULL;
53}
54
55PUBLIC cl_int
56clCreateKernelsInProgram(cl_program prog, cl_uint count,
57                         cl_kernel *kerns, cl_uint *count_ret) {
58   if (!prog)
59      throw error(CL_INVALID_PROGRAM);
60
61   if (prog->binaries().empty())
62      throw error(CL_INVALID_PROGRAM_EXECUTABLE);
63
64   auto &syms = prog->binaries().begin()->second.syms;
65
66   if (kerns && count < syms.size())
67      throw error(CL_INVALID_VALUE);
68
69   if (kerns)
70      std::transform(syms.begin(), syms.end(), kerns,
71                     [=](const module::symbol &sym) {
72                        return new kernel(*prog, compat::string(sym.name),
73                                          { sym.args.begin(), sym.args.end() });
74                     });
75
76   if (count_ret)
77      *count_ret = syms.size();
78
79   return CL_SUCCESS;
80}
81
82PUBLIC cl_int
83clRetainKernel(cl_kernel kern) {
84   if (!kern)
85      return CL_INVALID_KERNEL;
86
87   kern->retain();
88   return CL_SUCCESS;
89}
90
91PUBLIC cl_int
92clReleaseKernel(cl_kernel kern) {
93   if (!kern)
94      return CL_INVALID_KERNEL;
95
96   if (kern->release())
97      delete kern;
98
99   return CL_SUCCESS;
100}
101
102PUBLIC cl_int
103clSetKernelArg(cl_kernel kern, cl_uint idx, size_t size,
104               const void *value) try {
105   if (!kern)
106      throw error(CL_INVALID_KERNEL);
107
108   if (idx >= kern->args.size())
109      throw error(CL_INVALID_ARG_INDEX);
110
111   kern->args[idx]->set(size, value);
112
113   return CL_SUCCESS;
114
115} catch(error &e) {
116   return e.get();
117}
118
119PUBLIC cl_int
120clGetKernelInfo(cl_kernel kern, cl_kernel_info param,
121                size_t size, void *buf, size_t *size_ret) {
122   if (!kern)
123      return CL_INVALID_KERNEL;
124
125   switch (param) {
126   case CL_KERNEL_FUNCTION_NAME:
127      return string_property(buf, size, size_ret, kern->name());
128
129   case CL_KERNEL_NUM_ARGS:
130      return scalar_property<cl_uint>(buf, size, size_ret,
131                                      kern->args.size());
132
133   case CL_KERNEL_REFERENCE_COUNT:
134      return scalar_property<cl_uint>(buf, size, size_ret,
135                                      kern->ref_count());
136
137   case CL_KERNEL_CONTEXT:
138      return scalar_property<cl_context>(buf, size, size_ret,
139                                         &kern->prog.ctx);
140
141   case CL_KERNEL_PROGRAM:
142      return scalar_property<cl_program>(buf, size, size_ret,
143                                         &kern->prog);
144
145   default:
146      return CL_INVALID_VALUE;
147   }
148}
149
150PUBLIC cl_int
151clGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev,
152                         cl_kernel_work_group_info param,
153                         size_t size, void *buf, size_t *size_ret) {
154   if (!kern)
155      return CL_INVALID_KERNEL;
156
157   if ((!dev && kern->prog.binaries().size() != 1) ||
158       (dev && !kern->prog.binaries().count(dev)))
159      return CL_INVALID_DEVICE;
160
161   switch (param) {
162   case CL_KERNEL_WORK_GROUP_SIZE:
163      return scalar_property<size_t>(buf, size, size_ret,
164                                     kern->max_block_size());
165
166   case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
167      return vector_property<size_t>(buf, size, size_ret,
168                                     kern->block_size());
169
170   case CL_KERNEL_LOCAL_MEM_SIZE:
171      return scalar_property<cl_ulong>(buf, size, size_ret,
172                                       kern->mem_local());
173
174   case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
175      return scalar_property<size_t>(buf, size, size_ret, 1);
176
177   case CL_KERNEL_PRIVATE_MEM_SIZE:
178      return scalar_property<cl_ulong>(buf, size, size_ret,
179                                       kern->mem_private());
180
181   default:
182      return CL_INVALID_VALUE;
183   }
184}
185
186namespace {
187   ///
188   /// Common argument checking shared by kernel invocation commands.
189   ///
190   void
191   kernel_validate(cl_command_queue q, cl_kernel kern,
192                   cl_uint dims, const size_t *grid_offset,
193                   const size_t *grid_size, const size_t *block_size,
194                   cl_uint num_deps, const cl_event *deps,
195                   cl_event *ev) {
196      if (!q)
197         throw error(CL_INVALID_COMMAND_QUEUE);
198
199      if (!kern)
200         throw error(CL_INVALID_KERNEL);
201
202      if (&kern->prog.ctx != &q->ctx ||
203          any_of([&](const cl_event ev) {
204                return &ev->ctx != &q->ctx;
205             }, deps, deps + num_deps))
206         throw error(CL_INVALID_CONTEXT);
207
208      if (bool(num_deps) != bool(deps) ||
209          any_of(is_zero<cl_event>(), deps, deps + num_deps))
210         throw error(CL_INVALID_EVENT_WAIT_LIST);
211
212      if (any_of([](std::unique_ptr<kernel::argument> &arg) {
213               return !arg->set();
214            }, kern->args.begin(), kern->args.end()))
215         throw error(CL_INVALID_KERNEL_ARGS);
216
217      if (!kern->prog.binaries().count(&q->dev))
218         throw error(CL_INVALID_PROGRAM_EXECUTABLE);
219
220      if (dims < 1 || dims > q->dev.max_block_size().size())
221         throw error(CL_INVALID_WORK_DIMENSION);
222
223      if (!grid_size || any_of(is_zero<size_t>(), grid_size, grid_size + dims))
224         throw error(CL_INVALID_GLOBAL_WORK_SIZE);
225
226      if (block_size) {
227         if (any_of([](size_t b, size_t max) {
228                  return b == 0 || b > max;
229               }, block_size, block_size + dims,
230               q->dev.max_block_size().begin()))
231            throw error(CL_INVALID_WORK_ITEM_SIZE);
232
233         if (any_of([](size_t b, size_t g) {
234                  return g % b;
235               }, block_size, block_size + dims, grid_size))
236            throw error(CL_INVALID_WORK_GROUP_SIZE);
237
238         if (fold(std::multiplies<size_t>(), 1u,
239                  block_size, block_size + dims) >
240             q->dev.max_threads_per_block())
241            throw error(CL_INVALID_WORK_GROUP_SIZE);
242      }
243   }
244
245   ///
246   /// Common event action shared by kernel invocation commands.
247   ///
248   std::function<void (event &)>
249   kernel_op(cl_command_queue q, cl_kernel kern,
250             const std::vector<size_t> &grid_offset,
251             const std::vector<size_t> &grid_size,
252             const std::vector<size_t> &block_size) {
253      const std::vector<size_t> reduced_grid_size = map(
254         std::divides<size_t>(), grid_size.begin(), grid_size.end(),
255         block_size.begin());
256
257      return [=](event &) {
258         kern->launch(*q, grid_offset, reduced_grid_size, block_size);
259      };
260   }
261
262   template<typename T, typename S>
263   std::vector<T>
264   opt_vector(const T *p, S n) {
265      if (p)
266         return { p, p + n };
267      else
268         return { n };
269   }
270}
271
272PUBLIC cl_int
273clEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern,
274                       cl_uint dims, const size_t *pgrid_offset,
275                       const size_t *pgrid_size, const size_t *pblock_size,
276                       cl_uint num_deps, const cl_event *deps,
277                       cl_event *ev) try {
278   const std::vector<size_t> grid_offset = opt_vector(pgrid_offset, dims);
279   const std::vector<size_t> grid_size = opt_vector(pgrid_size, dims);
280   const std::vector<size_t> block_size = opt_vector(pblock_size, dims);
281
282   kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size,
283                   num_deps, deps, ev);
284
285   hard_event *hev = new hard_event(
286      *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps },
287      kernel_op(q, kern, grid_offset, grid_size, block_size));
288
289   ret_object(ev, hev);
290   return CL_SUCCESS;
291
292} catch(error &e) {
293   return e.get();
294}
295
296PUBLIC cl_int
297clEnqueueTask(cl_command_queue q, cl_kernel kern,
298              cl_uint num_deps, const cl_event *deps,
299              cl_event *ev) try {
300   const std::vector<size_t> grid_offset = { 0 };
301   const std::vector<size_t> grid_size = { 1 };
302   const std::vector<size_t> block_size = { 1 };
303
304   kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(),
305                   block_size.data(), num_deps, deps, ev);
306
307   hard_event *hev = new hard_event(
308      *q, CL_COMMAND_TASK, { deps, deps + num_deps },
309      kernel_op(q, kern, grid_offset, grid_size, block_size));
310
311   ret_object(ev, hev);
312   return CL_SUCCESS;
313
314} catch(error &e) {
315   return e.get();
316}
317
318PUBLIC cl_int
319clEnqueueNativeKernel(cl_command_queue q, void (*func)(void *),
320                      void *args, size_t args_size,
321                      cl_uint obj_count, const cl_mem *obj_list,
322                      const void **obj_args, cl_uint num_deps,
323                      const cl_event *deps, cl_event *ev) {
324   return CL_INVALID_OPERATION;
325}
326