1// 2// Copyright 2012 Francisco Jerez 3// 4// Permission is hereby granted, free of charge, to any person obtaining a 5// copy of this software and associated documentation files (the "Software"), 6// to deal in the Software without restriction, including without limitation 7// the rights to use, copy, modify, merge, publish, distribute, sublicense, 8// and/or sell copies of the Software, and to permit persons to whom the 9// Software is furnished to do so, subject to the following conditions: 10// 11// The above copyright notice and this permission notice shall be included in 12// all copies or substantial portions of the Software. 13// 14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20// SOFTWARE. 21// 22 23#include <cstring> 24 25#include "api/util.hpp" 26#include "core/event.hpp" 27#include "core/resource.hpp" 28 29using namespace clover; 30 31namespace { 32 typedef resource::point point; 33 34 /// 35 /// Common argument checking shared by memory transfer commands. 36 /// 37 void 38 validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) { 39 if (!q) 40 throw error(CL_INVALID_COMMAND_QUEUE); 41 42 if (bool(num_deps) != bool(deps) || 43 any_of(is_zero<cl_event>(), deps, deps + num_deps)) 44 throw error(CL_INVALID_EVENT_WAIT_LIST); 45 46 if (any_of([&](const cl_event ev) { 47 return &ev->ctx != &q->ctx; 48 }, deps, deps + num_deps)) 49 throw error(CL_INVALID_CONTEXT); 50 } 51 52 /// 53 /// Memory object-specific argument checking shared by most memory 54 /// transfer commands. 55 /// 56 void 57 validate_obj(cl_command_queue q, cl_mem obj) { 58 if (!obj) 59 throw error(CL_INVALID_MEM_OBJECT); 60 61 if (&obj->ctx != &q->ctx) 62 throw error(CL_INVALID_CONTEXT); 63 } 64 65 /// 66 /// Class that encapsulates the task of mapping an object of type 67 /// \a T. The return value of get() should be implicitly 68 /// convertible to \a void *. 69 /// 70 template<typename T> struct __map; 71 72 template<> struct __map<void *> { 73 static void * 74 get(cl_command_queue q, void *obj, cl_map_flags flags, 75 size_t offset, size_t size) { 76 return (char *)obj + offset; 77 } 78 }; 79 80 template<> struct __map<const void *> { 81 static const void * 82 get(cl_command_queue q, const void *obj, cl_map_flags flags, 83 size_t offset, size_t size) { 84 return (const char *)obj + offset; 85 } 86 }; 87 88 template<> struct __map<memory_obj *> { 89 static mapping 90 get(cl_command_queue q, memory_obj *obj, cl_map_flags flags, 91 size_t offset, size_t size) { 92 return { *q, obj->resource(q), flags, true, { offset }, { size }}; 93 } 94 }; 95 96 /// 97 /// Software copy from \a src_obj to \a dst_obj. They can be 98 /// either pointers or memory objects. 99 /// 100 template<typename T, typename S> 101 std::function<void (event &)> 102 soft_copy_op(cl_command_queue q, 103 T dst_obj, const point &dst_orig, const point &dst_pitch, 104 S src_obj, const point &src_orig, const point &src_pitch, 105 const point ®ion) { 106 return [=](event &) { 107 auto dst = __map<T>::get(q, dst_obj, CL_MAP_WRITE, 108 dst_pitch(dst_orig), dst_pitch(region)); 109 auto src = __map<S>::get(q, src_obj, CL_MAP_READ, 110 src_pitch(src_orig), src_pitch(region)); 111 point p; 112 113 for (p[2] = 0; p[2] < region[2]; ++p[2]) { 114 for (p[1] = 0; p[1] < region[1]; ++p[1]) { 115 std::memcpy(static_cast<char *>(dst) + dst_pitch(p), 116 static_cast<const char *>(src) + src_pitch(p), 117 src_pitch[0] * region[0]); 118 } 119 } 120 }; 121 } 122 123 /// 124 /// Hardware copy from \a src_obj to \a dst_obj. 125 /// 126 template<typename T, typename S> 127 std::function<void (event &)> 128 hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig, 129 S src_obj, const point &src_orig, const point ®ion) { 130 return [=](event &) { 131 dst_obj->resource(q).copy(*q, dst_orig, region, 132 src_obj->resource(q), src_orig); 133 }; 134 } 135} 136 137PUBLIC cl_int 138clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, 139 size_t offset, size_t size, void *ptr, 140 cl_uint num_deps, const cl_event *deps, 141 cl_event *ev) try { 142 validate_base(q, num_deps, deps); 143 validate_obj(q, obj); 144 145 if (!ptr || offset > obj->size() || offset + size > obj->size()) 146 throw error(CL_INVALID_VALUE); 147 148 hard_event *hev = new hard_event( 149 *q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps }, 150 soft_copy_op(q, 151 ptr, { 0 }, { 1 }, 152 obj, { offset }, { 1 }, 153 { size, 1, 1 })); 154 155 ret_object(ev, hev); 156 return CL_SUCCESS; 157 158} catch (error &e) { 159 return e.get(); 160} 161 162PUBLIC cl_int 163clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, 164 size_t offset, size_t size, const void *ptr, 165 cl_uint num_deps, const cl_event *deps, 166 cl_event *ev) try { 167 validate_base(q, num_deps, deps); 168 validate_obj(q, obj); 169 170 if (!ptr || offset > obj->size() || offset + size > obj->size()) 171 throw error(CL_INVALID_VALUE); 172 173 hard_event *hev = new hard_event( 174 *q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps }, 175 soft_copy_op(q, 176 obj, { offset }, { 1 }, 177 ptr, { 0 }, { 1 }, 178 { size, 1, 1 })); 179 180 ret_object(ev, hev); 181 return CL_SUCCESS; 182 183} catch (error &e) { 184 return e.get(); 185} 186 187PUBLIC cl_int 188clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, 189 const size_t *obj_origin, const size_t *host_origin, 190 const size_t *region, 191 size_t obj_row_pitch, size_t obj_slice_pitch, 192 size_t host_row_pitch, size_t host_slice_pitch, 193 void *ptr, 194 cl_uint num_deps, const cl_event *deps, 195 cl_event *ev) try { 196 validate_base(q, num_deps, deps); 197 validate_obj(q, obj); 198 199 if (!ptr) 200 throw error(CL_INVALID_VALUE); 201 202 hard_event *hev = new hard_event( 203 *q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps }, 204 soft_copy_op(q, 205 ptr, host_origin, 206 { 1, host_row_pitch, host_slice_pitch }, 207 obj, obj_origin, 208 { 1, obj_row_pitch, obj_slice_pitch }, 209 region)); 210 211 ret_object(ev, hev); 212 return CL_SUCCESS; 213 214} catch (error &e) { 215 return e.get(); 216} 217 218PUBLIC cl_int 219clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking, 220 const size_t *obj_origin, const size_t *host_origin, 221 const size_t *region, 222 size_t obj_row_pitch, size_t obj_slice_pitch, 223 size_t host_row_pitch, size_t host_slice_pitch, 224 const void *ptr, 225 cl_uint num_deps, const cl_event *deps, 226 cl_event *ev) try { 227 validate_base(q, num_deps, deps); 228 validate_obj(q, obj); 229 230 if (!ptr) 231 throw error(CL_INVALID_VALUE); 232 233 hard_event *hev = new hard_event( 234 *q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps }, 235 soft_copy_op(q, 236 obj, obj_origin, 237 { 1, obj_row_pitch, obj_slice_pitch }, 238 ptr, host_origin, 239 { 1, host_row_pitch, host_slice_pitch }, 240 region)); 241 242 ret_object(ev, hev); 243 return CL_SUCCESS; 244 245} catch (error &e) { 246 return e.get(); 247} 248 249PUBLIC cl_int 250clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, 251 size_t src_offset, size_t dst_offset, size_t size, 252 cl_uint num_deps, const cl_event *deps, 253 cl_event *ev) try { 254 validate_base(q, num_deps, deps); 255 validate_obj(q, src_obj); 256 validate_obj(q, dst_obj); 257 258 hard_event *hev = new hard_event( 259 *q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps }, 260 hard_copy_op(q, dst_obj, { dst_offset }, 261 src_obj, { src_offset }, 262 { size, 1, 1 })); 263 264 ret_object(ev, hev); 265 return CL_SUCCESS; 266 267} catch (error &e) { 268 return e.get(); 269} 270 271PUBLIC cl_int 272clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, 273 const size_t *src_origin, const size_t *dst_origin, 274 const size_t *region, 275 size_t src_row_pitch, size_t src_slice_pitch, 276 size_t dst_row_pitch, size_t dst_slice_pitch, 277 cl_uint num_deps, const cl_event *deps, 278 cl_event *ev) try { 279 validate_base(q, num_deps, deps); 280 validate_obj(q, src_obj); 281 validate_obj(q, dst_obj); 282 283 hard_event *hev = new hard_event( 284 *q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps }, 285 soft_copy_op(q, 286 dst_obj, dst_origin, 287 { 1, dst_row_pitch, dst_slice_pitch }, 288 src_obj, src_origin, 289 { 1, src_row_pitch, src_slice_pitch }, 290 region)); 291 292 ret_object(ev, hev); 293 return CL_SUCCESS; 294 295} catch (error &e) { 296 return e.get(); 297} 298 299PUBLIC cl_int 300clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking, 301 const size_t *origin, const size_t *region, 302 size_t row_pitch, size_t slice_pitch, void *ptr, 303 cl_uint num_deps, const cl_event *deps, 304 cl_event *ev) try { 305 image *img = dynamic_cast<image *>(obj); 306 307 validate_base(q, num_deps, deps); 308 validate_obj(q, img); 309 310 if (!ptr) 311 throw error(CL_INVALID_VALUE); 312 313 hard_event *hev = new hard_event( 314 *q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps }, 315 soft_copy_op(q, 316 ptr, {}, 317 { 1, row_pitch, slice_pitch }, 318 obj, origin, 319 { 1, img->row_pitch(), img->slice_pitch() }, 320 region)); 321 322 ret_object(ev, hev); 323 return CL_SUCCESS; 324 325} catch (error &e) { 326 return e.get(); 327} 328 329PUBLIC cl_int 330clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking, 331 const size_t *origin, const size_t *region, 332 size_t row_pitch, size_t slice_pitch, const void *ptr, 333 cl_uint num_deps, const cl_event *deps, 334 cl_event *ev) try { 335 image *img = dynamic_cast<image *>(obj); 336 337 validate_base(q, num_deps, deps); 338 validate_obj(q, img); 339 340 if (!ptr) 341 throw error(CL_INVALID_VALUE); 342 343 hard_event *hev = new hard_event( 344 *q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps }, 345 soft_copy_op(q, 346 obj, origin, 347 { 1, img->row_pitch(), img->slice_pitch() }, 348 ptr, {}, 349 { 1, row_pitch, slice_pitch }, 350 region)); 351 352 ret_object(ev, hev); 353 return CL_SUCCESS; 354 355} catch (error &e) { 356 return e.get(); 357} 358 359PUBLIC cl_int 360clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, 361 const size_t *src_origin, const size_t *dst_origin, 362 const size_t *region, 363 cl_uint num_deps, const cl_event *deps, 364 cl_event *ev) try { 365 image *src_img = dynamic_cast<image *>(src_obj); 366 image *dst_img = dynamic_cast<image *>(dst_obj); 367 368 validate_base(q, num_deps, deps); 369 validate_obj(q, src_img); 370 validate_obj(q, dst_img); 371 372 hard_event *hev = new hard_event( 373 *q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps }, 374 hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region)); 375 376 ret_object(ev, hev); 377 return CL_SUCCESS; 378 379} catch (error &e) { 380 return e.get(); 381} 382 383PUBLIC cl_int 384clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, 385 const size_t *src_origin, const size_t *region, 386 size_t dst_offset, 387 cl_uint num_deps, const cl_event *deps, 388 cl_event *ev) try { 389 image *src_img = dynamic_cast<image *>(src_obj); 390 391 validate_base(q, num_deps, deps); 392 validate_obj(q, src_img); 393 validate_obj(q, dst_obj); 394 395 hard_event *hev = new hard_event( 396 *q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps }, 397 soft_copy_op(q, 398 dst_obj, { dst_offset }, 399 { 0, 0, 0 }, 400 src_obj, src_origin, 401 { 1, src_img->row_pitch(), src_img->slice_pitch() }, 402 region)); 403 404 ret_object(ev, hev); 405 return CL_SUCCESS; 406 407} catch (error &e) { 408 return e.get(); 409} 410 411PUBLIC cl_int 412clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj, 413 size_t src_offset, 414 const size_t *dst_origin, const size_t *region, 415 cl_uint num_deps, const cl_event *deps, 416 cl_event *ev) try { 417 image *dst_img = dynamic_cast<image *>(src_obj); 418 419 validate_base(q, num_deps, deps); 420 validate_obj(q, src_obj); 421 validate_obj(q, dst_img); 422 423 hard_event *hev = new hard_event( 424 *q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps }, 425 soft_copy_op(q, 426 dst_obj, dst_origin, 427 { 1, dst_img->row_pitch(), dst_img->slice_pitch() }, 428 src_obj, { src_offset }, 429 { 0, 0, 0 }, 430 region)); 431 432 ret_object(ev, hev); 433 return CL_SUCCESS; 434 435} catch (error &e) { 436 return e.get(); 437} 438 439PUBLIC void * 440clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking, 441 cl_map_flags flags, size_t offset, size_t size, 442 cl_uint num_deps, const cl_event *deps, 443 cl_event *ev, cl_int *errcode_ret) try { 444 validate_base(q, num_deps, deps); 445 validate_obj(q, obj); 446 447 if (offset > obj->size() || offset + size > obj->size()) 448 throw error(CL_INVALID_VALUE); 449 450 void *map = obj->resource(q).add_map( 451 *q, flags, blocking, { offset }, { size }); 452 453 ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER, 454 { deps, deps + num_deps })); 455 ret_error(errcode_ret, CL_SUCCESS); 456 return map; 457 458} catch (error &e) { 459 ret_error(errcode_ret, e); 460 return NULL; 461} 462 463PUBLIC void * 464clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking, 465 cl_map_flags flags, 466 const size_t *origin, const size_t *region, 467 size_t *row_pitch, size_t *slice_pitch, 468 cl_uint num_deps, const cl_event *deps, 469 cl_event *ev, cl_int *errcode_ret) try { 470 image *img = dynamic_cast<image *>(obj); 471 472 validate_base(q, num_deps, deps); 473 validate_obj(q, img); 474 475 void *map = obj->resource(q).add_map( 476 *q, flags, blocking, origin, region); 477 478 ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE, 479 { deps, deps + num_deps })); 480 ret_error(errcode_ret, CL_SUCCESS); 481 return map; 482 483} catch (error &e) { 484 ret_error(errcode_ret, e); 485 return NULL; 486} 487 488PUBLIC cl_int 489clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr, 490 cl_uint num_deps, const cl_event *deps, 491 cl_event *ev) try { 492 validate_base(q, num_deps, deps); 493 validate_obj(q, obj); 494 495 hard_event *hev = new hard_event( 496 *q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps }, 497 [=](event &) { 498 obj->resource(q).del_map(ptr); 499 }); 500 501 ret_object(ev, hev); 502 return CL_SUCCESS; 503 504} catch (error &e) { 505 return e.get(); 506} 507