1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Copyright 2012 Francisco Jerez 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Permission is hereby granted, free of charge, to any person obtaining a 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// copy of this software and associated documentation files (the "Software"), 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// to deal in the Software without restriction, including without limitation 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// the rights to use, copy, modify, merge, publish, distribute, sublicense, 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// and/or sell copies of the Software, and to permit persons to whom the 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Software is furnished to do so, subject to the following conditions: 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// The above copyright notice and this permission notice shall be included in 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// all copies or substantial portions of the Software. 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// SOFTWARE. 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "api/util.hpp" 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "core/kernel.hpp" 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "core/event.hpp" 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace clover; 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_kernel 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclCreateKernel(cl_program prog, const char *name, 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_int *errcode_ret) try { 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prog) 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_PROGRAM); 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!name) 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_VALUE); 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prog->binaries().empty()) 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_PROGRAM_EXECUTABLE); 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org auto sym = prog->binaries().begin()->second.sym(name); 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_error(errcode_ret, CL_SUCCESS); 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return new kernel(*prog, name, { sym.args.begin(), sym.args.end() }); 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} catch (module::noent_error &e) { 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_error(errcode_ret, CL_INVALID_KERNEL_NAME); 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return NULL; 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} catch(error &e) { 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_error(errcode_ret, e); 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return NULL; 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_int 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclCreateKernelsInProgram(cl_program prog, cl_uint count, 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_kernel *kerns, cl_uint *count_ret) { 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!prog) 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_PROGRAM); 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (prog->binaries().empty()) 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_PROGRAM_EXECUTABLE); 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org auto &syms = prog->binaries().begin()->second.syms; 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (kerns && count < syms.size()) 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_VALUE); 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (kerns) 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::transform(syms.begin(), syms.end(), kerns, 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org [=](const module::symbol &sym) { 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return new kernel(*prog, compat::string(sym.name), 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org { sym.args.begin(), sym.args.end() }); 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }); 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (count_ret) 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *count_ret = syms.size(); 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_SUCCESS; 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_int 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclRetainKernel(cl_kernel kern) { 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!kern) 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_INVALID_KERNEL; 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kern->retain(); 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_SUCCESS; 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_int 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclReleaseKernel(cl_kernel kern) { 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!kern) 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_INVALID_KERNEL; 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (kern->release()) 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org delete kern; 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_SUCCESS; 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_int 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclSetKernelArg(cl_kernel kern, cl_uint idx, size_t size, 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const void *value) try { 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!kern) 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_KERNEL); 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (idx >= kern->args.size()) 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_ARG_INDEX); 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kern->args[idx]->set(size, value); 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_SUCCESS; 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} catch(error &e) { 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return e.get(); 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_int 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclGetKernelInfo(cl_kernel kern, cl_kernel_info param, 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t size, void *buf, size_t *size_ret) { 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!kern) 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_INVALID_KERNEL; 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (param) { 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_FUNCTION_NAME: 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return string_property(buf, size, size_ret, kern->name()); 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_NUM_ARGS: 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return scalar_property<cl_uint>(buf, size, size_ret, 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kern->args.size()); 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_REFERENCE_COUNT: 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return scalar_property<cl_uint>(buf, size, size_ret, 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kern->ref_count()); 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_CONTEXT: 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return scalar_property<cl_context>(buf, size, size_ret, 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &kern->prog.ctx); 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_PROGRAM: 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return scalar_property<cl_program>(buf, size, size_ret, 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org &kern->prog); 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_INVALID_VALUE; 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_int 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclGetKernelWorkGroupInfo(cl_kernel kern, cl_device_id dev, 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_kernel_work_group_info param, 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org size_t size, void *buf, size_t *size_ret) { 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!kern) 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_INVALID_KERNEL; 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((!dev && kern->prog.binaries().size() != 1) || 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org (dev && !kern->prog.binaries().count(dev))) 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_INVALID_DEVICE; 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (param) { 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_WORK_GROUP_SIZE: 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return scalar_property<size_t>(buf, size, size_ret, 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kern->max_block_size()); 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return vector_property<size_t>(buf, size, size_ret, 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kern->block_size()); 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_LOCAL_MEM_SIZE: 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return scalar_property<cl_ulong>(buf, size, size_ret, 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kern->mem_local()); 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return scalar_property<size_t>(buf, size, size_ret, 1); 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case CL_KERNEL_PRIVATE_MEM_SIZE: 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return scalar_property<cl_ulong>(buf, size, size_ret, 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kern->mem_private()); 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_INVALID_VALUE; 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgnamespace { 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /// 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /// Common argument checking shared by kernel invocation commands. 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /// 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kernel_validate(cl_command_queue q, cl_kernel kern, 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_uint dims, const size_t *grid_offset, 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const size_t *grid_size, const size_t *block_size, 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_uint num_deps, const cl_event *deps, 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_event *ev) { 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!q) 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_COMMAND_QUEUE); 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!kern) 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_KERNEL); 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (&kern->prog.ctx != &q->ctx || 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org any_of([&](const cl_event ev) { 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return &ev->ctx != &q->ctx; 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }, deps, deps + num_deps)) 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_CONTEXT); 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (bool(num_deps) != bool(deps) || 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org any_of(is_zero<cl_event>(), deps, deps + num_deps)) 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_EVENT_WAIT_LIST); 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (any_of([](std::unique_ptr<kernel::argument> &arg) { 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return !arg->set(); 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }, kern->args.begin(), kern->args.end())) 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_KERNEL_ARGS); 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!kern->prog.binaries().count(&q->dev)) 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_PROGRAM_EXECUTABLE); 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (dims < 1 || dims > q->dev.max_block_size().size()) 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_WORK_DIMENSION); 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (!grid_size || any_of(is_zero<size_t>(), grid_size, grid_size + dims)) 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_GLOBAL_WORK_SIZE); 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (block_size) { 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (any_of([](size_t b, size_t max) { 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return b == 0 || b > max; 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }, block_size, block_size + dims, 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org q->dev.max_block_size().begin())) 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_WORK_ITEM_SIZE); 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (any_of([](size_t b, size_t g) { 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return g % b; 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }, block_size, block_size + dims, grid_size)) 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_WORK_GROUP_SIZE); 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (fold(std::multiplies<size_t>(), 1u, 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org block_size, block_size + dims) > 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org q->dev.max_threads_per_block()) 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org throw error(CL_INVALID_WORK_GROUP_SIZE); 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /// 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /// Common event action shared by kernel invocation commands. 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org /// 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::function<void (event &)> 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kernel_op(cl_command_queue q, cl_kernel kern, 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> &grid_offset, 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> &grid_size, 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> &block_size) { 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> reduced_grid_size = map( 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::divides<size_t>(), grid_size.begin(), grid_size.end(), 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org block_size.begin()); 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return [=](event &) { 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kern->launch(*q, grid_offset, reduced_grid_size, block_size); 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org }; 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org template<typename T, typename S> 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org std::vector<T> 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org opt_vector(const T *p, S n) { 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (p) 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return { p, p + n }; 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org else 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return { n }; 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_int 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclEnqueueNDRangeKernel(cl_command_queue q, cl_kernel kern, 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_uint dims, const size_t *pgrid_offset, 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const size_t *pgrid_size, const size_t *pblock_size, 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_uint num_deps, const cl_event *deps, 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_event *ev) try { 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> grid_offset = opt_vector(pgrid_offset, dims); 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> grid_size = opt_vector(pgrid_size, dims); 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> block_size = opt_vector(pblock_size, dims); 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kernel_validate(q, kern, dims, pgrid_offset, pgrid_size, pblock_size, 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org num_deps, deps, ev); 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hard_event *hev = new hard_event( 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *q, CL_COMMAND_NDRANGE_KERNEL, { deps, deps + num_deps }, 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kernel_op(q, kern, grid_offset, grid_size, block_size)); 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_object(ev, hev); 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_SUCCESS; 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} catch(error &e) { 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return e.get(); 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_int 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclEnqueueTask(cl_command_queue q, cl_kernel kern, 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_uint num_deps, const cl_event *deps, 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_event *ev) try { 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> grid_offset = { 0 }; 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> grid_size = { 1 }; 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const std::vector<size_t> block_size = { 1 }; 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kernel_validate(q, kern, 1, grid_offset.data(), grid_size.data(), 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org block_size.data(), num_deps, deps, ev); 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org hard_event *hev = new hard_event( 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org *q, CL_COMMAND_TASK, { deps, deps + num_deps }, 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org kernel_op(q, kern, grid_offset, grid_size, block_size)); 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ret_object(ev, hev); 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_SUCCESS; 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} catch(error &e) { 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return e.get(); 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgPUBLIC cl_int 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgclEnqueueNativeKernel(cl_command_queue q, void (*func)(void *), 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org void *args, size_t args_size, 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cl_uint obj_count, const cl_mem *obj_list, 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const void **obj_args, cl_uint num_deps, 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const cl_event *deps, cl_event *ev) { 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CL_INVALID_OPERATION; 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 326