transfer.cpp revision 6230f77232a4703c9f98f569104ac03430a5fb95
1// 2// Copyright 2012 Francisco Jerez 3// 4// Permission is hereby granted, free of charge, to any person obtaining a 5// copy of this software and associated documentation files (the "Software"), 6// to deal in the Software without restriction, including without limitation 7// the rights to use, copy, modify, merge, publish, distribute, sublicense, 8// and/or sell copies of the Software, and to permit persons to whom the 9// Software is furnished to do so, subject to the following conditions: 10// 11// The above copyright notice and this permission notice shall be included in 12// all copies or substantial portions of the Software. 13// 14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20// OTHER DEALINGS IN THE SOFTWARE. 21// 22 23#include <cstring> 24 25#include "api/util.hpp" 26#include "core/event.hpp" 27#include "core/resource.hpp" 28 29using namespace clover; 30 31namespace { 32 typedef resource::vector vector_t; 33 34 vector_t 35 vector(const size_t *p) { 36 return range(p, 3); 37 } 38 39 /// 40 /// Common argument checking shared by memory transfer commands. 41 /// 42 void 43 validate_common(command_queue &q, 44 std::initializer_list<std::reference_wrapper<memory_obj>> mems, 45 const ref_vector<event> &deps) { 46 if (any_of([&](const event &ev) { 47 return &ev.ctx != &q.ctx; 48 }, deps)) 49 throw error(CL_INVALID_CONTEXT); 50 51 if (any_of([&](const memory_obj &mem) { 52 return &mem.ctx != &q.ctx; 53 }, mems)) 54 throw error(CL_INVALID_CONTEXT); 55 } 56 57 /// 58 /// Class that encapsulates the task of mapping an object of type 59 /// \a T. The return value of get() should be implicitly 60 /// convertible to \a void *. 61 /// 62 template<typename T> 63 struct _map { 64 static mapping 65 get(command_queue &q, T obj, cl_map_flags flags, 66 size_t offset, size_t size) { 67 return { q, obj->resource(q), flags, true, 68 {{ offset }}, {{ size, 1, 1 }} }; 69 } 70 }; 71 72 template<> 73 struct _map<void *> { 74 static void * 75 get(command_queue &q, void *obj, cl_map_flags flags, 76 size_t offset, size_t size) { 77 return (char *)obj + offset; 78 } 79 }; 80 81 template<> 82 struct _map<const void *> { 83 static const void * 84 get(command_queue &q, const void *obj, cl_map_flags flags, 85 size_t offset, size_t size) { 86 return (const char *)obj + offset; 87 } 88 }; 89 90 /// 91 /// Software copy from \a src_obj to \a dst_obj. They can be 92 /// either pointers or memory objects. 93 /// 94 template<typename T, typename S> 95 std::function<void (event &)> 96 soft_copy_op(command_queue &q, 97 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch, 98 S src_obj, const vector_t &src_orig, const vector_t &src_pitch, 99 const vector_t ®ion) { 100 return [=, &q](event &) { 101 auto dst = _map<T>::get(q, dst_obj, CL_MAP_WRITE, 102 dot(dst_pitch, dst_orig), 103 dst_pitch[2] * region[2]); 104 auto src = _map<S>::get(q, src_obj, CL_MAP_READ, 105 dot(src_pitch, src_orig), 106 src_pitch[2] * region[2]); 107 vector_t v = {}; 108 109 for (v[2] = 0; v[2] < region[2]; ++v[2]) { 110 for (v[1] = 0; v[1] < region[1]; ++v[1]) { 111 std::memcpy( 112 static_cast<char *>(dst) + dot(dst_pitch, v), 113 static_cast<const char *>(src) + dot(src_pitch, v), 114 src_pitch[0] * region[0]); 115 } 116 } 117 }; 118 } 119 120 /// 121 /// Hardware copy from \a src_obj to \a dst_obj. 122 /// 123 template<typename T, typename S> 124 std::function<void (event &)> 125 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig, 126 S src_obj, const vector_t &src_orig, const vector_t ®ion) { 127 return [=, &q](event &) { 128 dst_obj->resource(q).copy(q, dst_orig, region, 129 src_obj->resource(q), src_orig); 130 }; 131 } 132} 133 134CLOVER_API cl_int 135clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 136 size_t offset, size_t size, void *ptr, 137 cl_uint num_deps, const cl_event *d_deps, 138 cl_event *rd_ev) try { 139 auto &q = obj(d_q); 140 auto &mem = obj(d_mem); 141 auto deps = objs<wait_list_tag>(d_deps, num_deps); 142 143 validate_common(q, { mem }, deps); 144 145 if (!ptr || offset > mem.size() || offset + size > mem.size()) 146 throw error(CL_INVALID_VALUE); 147 148 hard_event *hev = new hard_event( 149 q, CL_COMMAND_READ_BUFFER, deps, 150 soft_copy_op(q, 151 ptr, {{ 0 }}, {{ 1 }}, 152 &mem, {{ offset }}, {{ 1 }}, 153 {{ size, 1, 1 }})); 154 155 ret_object(rd_ev, hev); 156 return CL_SUCCESS; 157 158} catch (error &e) { 159 return e.get(); 160} 161 162CLOVER_API cl_int 163clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 164 size_t offset, size_t size, const void *ptr, 165 cl_uint num_deps, const cl_event *d_deps, 166 cl_event *rd_ev) try { 167 auto &q = obj(d_q); 168 auto &mem = obj(d_mem); 169 auto deps = objs<wait_list_tag>(d_deps, num_deps); 170 171 validate_common(q, { mem }, deps); 172 173 if (!ptr || offset > mem.size() || offset + size > mem.size()) 174 throw error(CL_INVALID_VALUE); 175 176 hard_event *hev = new hard_event( 177 q, CL_COMMAND_WRITE_BUFFER, deps, 178 soft_copy_op(q, 179 &mem, {{ offset }}, {{ 1 }}, 180 ptr, {{ 0 }}, {{ 1 }}, 181 {{ size, 1, 1 }})); 182 183 ret_object(rd_ev, hev); 184 return CL_SUCCESS; 185 186} catch (error &e) { 187 return e.get(); 188} 189 190CLOVER_API cl_int 191clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 192 const size_t *obj_origin, 193 const size_t *host_origin, 194 const size_t *region, 195 size_t obj_row_pitch, size_t obj_slice_pitch, 196 size_t host_row_pitch, size_t host_slice_pitch, 197 void *ptr, 198 cl_uint num_deps, const cl_event *d_deps, 199 cl_event *rd_ev) try { 200 auto &q = obj(d_q); 201 auto &mem = obj(d_mem); 202 auto deps = objs<wait_list_tag>(d_deps, num_deps); 203 204 validate_common(q, { mem }, deps); 205 206 if (!ptr) 207 throw error(CL_INVALID_VALUE); 208 209 hard_event *hev = new hard_event( 210 q, CL_COMMAND_READ_BUFFER_RECT, deps, 211 soft_copy_op(q, 212 ptr, vector(host_origin), 213 {{ 1, host_row_pitch, host_slice_pitch }}, 214 &mem, vector(obj_origin), 215 {{ 1, obj_row_pitch, obj_slice_pitch }}, 216 vector(region))); 217 218 ret_object(rd_ev, hev); 219 return CL_SUCCESS; 220 221} catch (error &e) { 222 return e.get(); 223} 224 225CLOVER_API cl_int 226clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 227 const size_t *obj_origin, 228 const size_t *host_origin, 229 const size_t *region, 230 size_t obj_row_pitch, size_t obj_slice_pitch, 231 size_t host_row_pitch, size_t host_slice_pitch, 232 const void *ptr, 233 cl_uint num_deps, const cl_event *d_deps, 234 cl_event *rd_ev) try { 235 auto &q = obj(d_q); 236 auto &mem = obj(d_mem); 237 auto deps = objs<wait_list_tag>(d_deps, num_deps); 238 239 validate_common(q, { mem }, deps); 240 241 if (!ptr) 242 throw error(CL_INVALID_VALUE); 243 244 hard_event *hev = new hard_event( 245 q, CL_COMMAND_WRITE_BUFFER_RECT, deps, 246 soft_copy_op(q, 247 &mem, vector(obj_origin), 248 {{ 1, obj_row_pitch, obj_slice_pitch }}, 249 ptr, vector(host_origin), 250 {{ 1, host_row_pitch, host_slice_pitch }}, 251 vector(region))); 252 253 ret_object(rd_ev, hev); 254 return CL_SUCCESS; 255 256} catch (error &e) { 257 return e.get(); 258} 259 260CLOVER_API cl_int 261clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem, 262 size_t src_offset, size_t dst_offset, size_t size, 263 cl_uint num_deps, const cl_event *d_deps, 264 cl_event *rd_ev) try { 265 auto &q = obj(d_q); 266 auto &src_mem = obj(d_src_mem); 267 auto &dst_mem = obj(d_dst_mem); 268 auto deps = objs<wait_list_tag>(d_deps, num_deps); 269 270 validate_common(q, { src_mem, dst_mem }, deps); 271 272 hard_event *hev = new hard_event( 273 q, CL_COMMAND_COPY_BUFFER, deps, 274 hard_copy_op(q, &dst_mem, {{ dst_offset }}, 275 &src_mem, {{ src_offset }}, 276 {{ size, 1, 1 }})); 277 278 ret_object(rd_ev, hev); 279 return CL_SUCCESS; 280 281} catch (error &e) { 282 return e.get(); 283} 284 285CLOVER_API cl_int 286clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem, 287 cl_mem d_dst_mem, 288 const size_t *src_origin, const size_t *dst_origin, 289 const size_t *region, 290 size_t src_row_pitch, size_t src_slice_pitch, 291 size_t dst_row_pitch, size_t dst_slice_pitch, 292 cl_uint num_deps, const cl_event *d_deps, 293 cl_event *rd_ev) try { 294 auto &q = obj(d_q); 295 auto &src_mem = obj(d_src_mem); 296 auto &dst_mem = obj(d_dst_mem); 297 auto deps = objs<wait_list_tag>(d_deps, num_deps); 298 299 validate_common(q, { src_mem, dst_mem }, deps); 300 301 hard_event *hev = new hard_event( 302 q, CL_COMMAND_COPY_BUFFER_RECT, deps, 303 soft_copy_op(q, 304 &dst_mem, vector(dst_origin), 305 {{ 1, dst_row_pitch, dst_slice_pitch }}, 306 &src_mem, vector(src_origin), 307 {{ 1, src_row_pitch, src_slice_pitch }}, 308 vector(region))); 309 310 ret_object(rd_ev, hev); 311 return CL_SUCCESS; 312 313} catch (error &e) { 314 return e.get(); 315} 316 317CLOVER_API cl_int 318clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 319 const size_t *origin, const size_t *region, 320 size_t row_pitch, size_t slice_pitch, void *ptr, 321 cl_uint num_deps, const cl_event *d_deps, 322 cl_event *rd_ev) try { 323 auto &q = obj(d_q); 324 auto &img = obj<image>(d_mem); 325 auto deps = objs<wait_list_tag>(d_deps, num_deps); 326 327 validate_common(q, { img }, deps); 328 329 if (!ptr) 330 throw error(CL_INVALID_VALUE); 331 332 hard_event *hev = new hard_event( 333 q, CL_COMMAND_READ_IMAGE, deps, 334 soft_copy_op(q, 335 ptr, {}, 336 {{ 1, row_pitch, slice_pitch }}, 337 &img, vector(origin), 338 {{ 1, img.row_pitch(), img.slice_pitch() }}, 339 vector(region))); 340 341 ret_object(rd_ev, hev); 342 return CL_SUCCESS; 343 344} catch (error &e) { 345 return e.get(); 346} 347 348CLOVER_API cl_int 349clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 350 const size_t *origin, const size_t *region, 351 size_t row_pitch, size_t slice_pitch, const void *ptr, 352 cl_uint num_deps, const cl_event *d_deps, 353 cl_event *rd_ev) try { 354 auto &q = obj(d_q); 355 auto &img = obj<image>(d_mem); 356 auto deps = objs<wait_list_tag>(d_deps, num_deps); 357 358 validate_common(q, { img }, deps); 359 360 if (!ptr) 361 throw error(CL_INVALID_VALUE); 362 363 hard_event *hev = new hard_event( 364 q, CL_COMMAND_WRITE_IMAGE, deps, 365 soft_copy_op(q, 366 &img, vector(origin), 367 {{ 1, img.row_pitch(), img.slice_pitch() }}, 368 ptr, {}, 369 {{ 1, row_pitch, slice_pitch }}, 370 vector(region))); 371 372 ret_object(rd_ev, hev); 373 return CL_SUCCESS; 374 375} catch (error &e) { 376 return e.get(); 377} 378 379CLOVER_API cl_int 380clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem, 381 const size_t *src_origin, const size_t *dst_origin, 382 const size_t *region, 383 cl_uint num_deps, const cl_event *d_deps, 384 cl_event *rd_ev) try { 385 auto &q = obj(d_q); 386 auto &src_img = obj<image>(d_src_mem); 387 auto &dst_img = obj<image>(d_dst_mem); 388 auto deps = objs<wait_list_tag>(d_deps, num_deps); 389 390 validate_common(q, { src_img, dst_img }, deps); 391 392 hard_event *hev = new hard_event( 393 q, CL_COMMAND_COPY_IMAGE, deps, 394 hard_copy_op(q, 395 &dst_img, vector(dst_origin), 396 &src_img, vector(src_origin), 397 vector(region))); 398 399 ret_object(rd_ev, hev); 400 return CL_SUCCESS; 401 402} catch (error &e) { 403 return e.get(); 404} 405 406CLOVER_API cl_int 407clEnqueueCopyImageToBuffer(cl_command_queue d_q, 408 cl_mem d_src_mem, cl_mem d_dst_mem, 409 const size_t *src_origin, const size_t *region, 410 size_t dst_offset, 411 cl_uint num_deps, const cl_event *d_deps, 412 cl_event *rd_ev) try { 413 auto &q = obj(d_q); 414 auto &src_img = obj<image>(d_src_mem); 415 auto &dst_mem = obj(d_dst_mem); 416 auto deps = objs<wait_list_tag>(d_deps, num_deps); 417 418 validate_common(q, { src_img, dst_mem }, deps); 419 420 hard_event *hev = new hard_event( 421 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps, 422 soft_copy_op(q, 423 &dst_mem, {{ dst_offset }}, 424 {{ 0, 0, 0 }}, 425 &src_img, vector(src_origin), 426 {{ 1, src_img.row_pitch(), src_img.slice_pitch() }}, 427 vector(region))); 428 429 ret_object(rd_ev, hev); 430 return CL_SUCCESS; 431 432} catch (error &e) { 433 return e.get(); 434} 435 436CLOVER_API cl_int 437clEnqueueCopyBufferToImage(cl_command_queue d_q, 438 cl_mem d_src_mem, cl_mem d_dst_mem, 439 size_t src_offset, 440 const size_t *dst_origin, const size_t *region, 441 cl_uint num_deps, const cl_event *d_deps, 442 cl_event *rd_ev) try { 443 auto &q = obj(d_q); 444 auto &src_mem = obj(d_src_mem); 445 auto &dst_img = obj<image>(d_dst_mem); 446 auto deps = objs<wait_list_tag>(d_deps, num_deps); 447 448 validate_common(q, { src_mem, dst_img }, deps); 449 450 hard_event *hev = new hard_event( 451 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps, 452 soft_copy_op(q, 453 &dst_img, vector(dst_origin), 454 {{ 1, dst_img.row_pitch(), dst_img.slice_pitch() }}, 455 &src_mem, {{ src_offset }}, 456 {{ 0, 0, 0 }}, 457 vector(region))); 458 459 ret_object(rd_ev, hev); 460 return CL_SUCCESS; 461 462} catch (error &e) { 463 return e.get(); 464} 465 466CLOVER_API void * 467clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 468 cl_map_flags flags, size_t offset, size_t size, 469 cl_uint num_deps, const cl_event *d_deps, 470 cl_event *rd_ev, cl_int *r_errcode) try { 471 auto &q = obj(d_q); 472 auto &mem = obj(d_mem); 473 auto deps = objs<wait_list_tag>(d_deps, num_deps); 474 475 validate_common(q, { mem }, deps); 476 477 if (offset > mem.size() || offset + size > mem.size()) 478 throw error(CL_INVALID_VALUE); 479 480 void *map = mem.resource(q).add_map( 481 q, flags, blocking, {{ offset }}, {{ size }}); 482 483 ret_object(rd_ev, new hard_event(q, CL_COMMAND_MAP_BUFFER, deps)); 484 ret_error(r_errcode, CL_SUCCESS); 485 return map; 486 487} catch (error &e) { 488 ret_error(r_errcode, e); 489 return NULL; 490} 491 492CLOVER_API void * 493clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 494 cl_map_flags flags, 495 const size_t *origin, const size_t *region, 496 size_t *row_pitch, size_t *slice_pitch, 497 cl_uint num_deps, const cl_event *d_deps, 498 cl_event *rd_ev, cl_int *r_errcode) try { 499 auto &q = obj(d_q); 500 auto &img = obj<image>(d_mem); 501 auto deps = objs<wait_list_tag>(d_deps, num_deps); 502 503 validate_common(q, { img }, deps); 504 505 void *map = img.resource(q).add_map( 506 q, flags, blocking, vector(origin), vector(region)); 507 508 ret_object(rd_ev, new hard_event(q, CL_COMMAND_MAP_IMAGE, deps)); 509 ret_error(r_errcode, CL_SUCCESS); 510 return map; 511 512} catch (error &e) { 513 ret_error(r_errcode, e); 514 return NULL; 515} 516 517CLOVER_API cl_int 518clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr, 519 cl_uint num_deps, const cl_event *d_deps, 520 cl_event *rd_ev) try { 521 auto &q = obj(d_q); 522 auto &mem = obj(d_mem); 523 auto deps = objs<wait_list_tag>(d_deps, num_deps); 524 525 validate_common(q, { mem }, deps); 526 527 hard_event *hev = new hard_event( 528 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps, 529 [=, &q, &mem](event &) { 530 mem.resource(q).del_map(ptr); 531 }); 532 533 ret_object(rd_ev, hev); 534 return CL_SUCCESS; 535 536} catch (error &e) { 537 return e.get(); 538} 539