transfer.cpp revision 6230f77232a4703c9f98f569104ac03430a5fb95
1//
2// Copyright 2012 Francisco Jerez
3//
4// Permission is hereby granted, free of charge, to any person obtaining a
5// copy of this software and associated documentation files (the "Software"),
6// to deal in the Software without restriction, including without limitation
7// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8// and/or sell copies of the Software, and to permit persons to whom the
9// Software is furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20// OTHER DEALINGS IN THE SOFTWARE.
21//
22
23#include <cstring>
24
25#include "api/util.hpp"
26#include "core/event.hpp"
27#include "core/resource.hpp"
28
29using namespace clover;
30
31namespace {
32   typedef resource::vector vector_t;
33
34   vector_t
35   vector(const size_t *p) {
36      return range(p, 3);
37   }
38
39   ///
40   /// Common argument checking shared by memory transfer commands.
41   ///
42   void
43   validate_common(command_queue &q,
44                   std::initializer_list<std::reference_wrapper<memory_obj>> mems,
45                   const ref_vector<event> &deps) {
46      if (any_of([&](const event &ev) {
47               return &ev.ctx != &q.ctx;
48            }, deps))
49         throw error(CL_INVALID_CONTEXT);
50
51      if (any_of([&](const memory_obj &mem) {
52               return &mem.ctx != &q.ctx;
53            }, mems))
54         throw error(CL_INVALID_CONTEXT);
55   }
56
57   ///
58   /// Class that encapsulates the task of mapping an object of type
59   /// \a T.  The return value of get() should be implicitly
60   /// convertible to \a void *.
61   ///
62   template<typename T>
63   struct _map {
64      static mapping
65      get(command_queue &q, T obj, cl_map_flags flags,
66          size_t offset, size_t size) {
67         return { q, obj->resource(q), flags, true,
68                  {{ offset }}, {{ size, 1, 1 }} };
69      }
70   };
71
72   template<>
73   struct _map<void *> {
74      static void *
75      get(command_queue &q, void *obj, cl_map_flags flags,
76          size_t offset, size_t size) {
77         return (char *)obj + offset;
78      }
79   };
80
81   template<>
82   struct _map<const void *> {
83      static const void *
84      get(command_queue &q, const void *obj, cl_map_flags flags,
85          size_t offset, size_t size) {
86         return (const char *)obj + offset;
87      }
88   };
89
90   ///
91   /// Software copy from \a src_obj to \a dst_obj.  They can be
92   /// either pointers or memory objects.
93   ///
94   template<typename T, typename S>
95   std::function<void (event &)>
96   soft_copy_op(command_queue &q,
97                T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
98                S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
99                const vector_t &region) {
100      return [=, &q](event &) {
101         auto dst = _map<T>::get(q, dst_obj, CL_MAP_WRITE,
102                                 dot(dst_pitch, dst_orig),
103                                 dst_pitch[2] * region[2]);
104         auto src = _map<S>::get(q, src_obj, CL_MAP_READ,
105                                 dot(src_pitch, src_orig),
106                                 src_pitch[2] * region[2]);
107         vector_t v = {};
108
109         for (v[2] = 0; v[2] < region[2]; ++v[2]) {
110            for (v[1] = 0; v[1] < region[1]; ++v[1]) {
111               std::memcpy(
112                  static_cast<char *>(dst) + dot(dst_pitch, v),
113                  static_cast<const char *>(src) + dot(src_pitch, v),
114                  src_pitch[0] * region[0]);
115            }
116         }
117      };
118   }
119
120   ///
121   /// Hardware copy from \a src_obj to \a dst_obj.
122   ///
123   template<typename T, typename S>
124   std::function<void (event &)>
125   hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
126                S src_obj, const vector_t &src_orig, const vector_t &region) {
127      return [=, &q](event &) {
128         dst_obj->resource(q).copy(q, dst_orig, region,
129                                   src_obj->resource(q), src_orig);
130      };
131   }
132}
133
134CLOVER_API cl_int
135clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
136                    size_t offset, size_t size, void *ptr,
137                    cl_uint num_deps, const cl_event *d_deps,
138                    cl_event *rd_ev) try {
139   auto &q = obj(d_q);
140   auto &mem = obj(d_mem);
141   auto deps = objs<wait_list_tag>(d_deps, num_deps);
142
143   validate_common(q, { mem }, deps);
144
145   if (!ptr || offset > mem.size() || offset + size > mem.size())
146      throw error(CL_INVALID_VALUE);
147
148   hard_event *hev = new hard_event(
149      q, CL_COMMAND_READ_BUFFER, deps,
150      soft_copy_op(q,
151                   ptr, {{ 0 }}, {{ 1 }},
152                   &mem, {{ offset }}, {{ 1 }},
153                   {{ size, 1, 1 }}));
154
155   ret_object(rd_ev, hev);
156   return CL_SUCCESS;
157
158} catch (error &e) {
159   return e.get();
160}
161
162CLOVER_API cl_int
163clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
164                     size_t offset, size_t size, const void *ptr,
165                     cl_uint num_deps, const cl_event *d_deps,
166                     cl_event *rd_ev) try {
167   auto &q = obj(d_q);
168   auto &mem = obj(d_mem);
169   auto deps = objs<wait_list_tag>(d_deps, num_deps);
170
171   validate_common(q, { mem }, deps);
172
173   if (!ptr || offset > mem.size() || offset + size > mem.size())
174      throw error(CL_INVALID_VALUE);
175
176   hard_event *hev = new hard_event(
177      q, CL_COMMAND_WRITE_BUFFER, deps,
178      soft_copy_op(q,
179                   &mem, {{ offset }}, {{ 1 }},
180                   ptr, {{ 0 }}, {{ 1 }},
181                   {{ size, 1, 1 }}));
182
183   ret_object(rd_ev, hev);
184   return CL_SUCCESS;
185
186} catch (error &e) {
187   return e.get();
188}
189
190CLOVER_API cl_int
191clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
192                        const size_t *obj_origin,
193                        const size_t *host_origin,
194                        const size_t *region,
195                        size_t obj_row_pitch, size_t obj_slice_pitch,
196                        size_t host_row_pitch, size_t host_slice_pitch,
197                        void *ptr,
198                        cl_uint num_deps, const cl_event *d_deps,
199                        cl_event *rd_ev) try {
200   auto &q = obj(d_q);
201   auto &mem = obj(d_mem);
202   auto deps = objs<wait_list_tag>(d_deps, num_deps);
203
204   validate_common(q, { mem }, deps);
205
206   if (!ptr)
207      throw error(CL_INVALID_VALUE);
208
209   hard_event *hev = new hard_event(
210      q, CL_COMMAND_READ_BUFFER_RECT, deps,
211      soft_copy_op(q,
212                   ptr, vector(host_origin),
213                   {{ 1, host_row_pitch, host_slice_pitch }},
214                   &mem, vector(obj_origin),
215                   {{ 1, obj_row_pitch, obj_slice_pitch }},
216                   vector(region)));
217
218   ret_object(rd_ev, hev);
219   return CL_SUCCESS;
220
221} catch (error &e) {
222   return e.get();
223}
224
225CLOVER_API cl_int
226clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
227                         const size_t *obj_origin,
228                         const size_t *host_origin,
229                         const size_t *region,
230                         size_t obj_row_pitch, size_t obj_slice_pitch,
231                         size_t host_row_pitch, size_t host_slice_pitch,
232                         const void *ptr,
233                         cl_uint num_deps, const cl_event *d_deps,
234                         cl_event *rd_ev) try {
235   auto &q = obj(d_q);
236   auto &mem = obj(d_mem);
237   auto deps = objs<wait_list_tag>(d_deps, num_deps);
238
239   validate_common(q, { mem }, deps);
240
241   if (!ptr)
242      throw error(CL_INVALID_VALUE);
243
244   hard_event *hev = new hard_event(
245      q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
246      soft_copy_op(q,
247                   &mem, vector(obj_origin),
248                   {{ 1, obj_row_pitch, obj_slice_pitch }},
249                   ptr, vector(host_origin),
250                   {{ 1, host_row_pitch, host_slice_pitch }},
251                   vector(region)));
252
253   ret_object(rd_ev, hev);
254   return CL_SUCCESS;
255
256} catch (error &e) {
257   return e.get();
258}
259
260CLOVER_API cl_int
261clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
262                    size_t src_offset, size_t dst_offset, size_t size,
263                    cl_uint num_deps, const cl_event *d_deps,
264                    cl_event *rd_ev) try {
265   auto &q = obj(d_q);
266   auto &src_mem = obj(d_src_mem);
267   auto &dst_mem = obj(d_dst_mem);
268   auto deps = objs<wait_list_tag>(d_deps, num_deps);
269
270   validate_common(q, { src_mem, dst_mem }, deps);
271
272   hard_event *hev = new hard_event(
273      q, CL_COMMAND_COPY_BUFFER, deps,
274      hard_copy_op(q, &dst_mem, {{ dst_offset }},
275                   &src_mem, {{ src_offset }},
276                   {{ size, 1, 1 }}));
277
278   ret_object(rd_ev, hev);
279   return CL_SUCCESS;
280
281} catch (error &e) {
282   return e.get();
283}
284
285CLOVER_API cl_int
286clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
287                        cl_mem d_dst_mem,
288                        const size_t *src_origin, const size_t *dst_origin,
289                        const size_t *region,
290                        size_t src_row_pitch, size_t src_slice_pitch,
291                        size_t dst_row_pitch, size_t dst_slice_pitch,
292                        cl_uint num_deps, const cl_event *d_deps,
293                        cl_event *rd_ev) try {
294   auto &q = obj(d_q);
295   auto &src_mem = obj(d_src_mem);
296   auto &dst_mem = obj(d_dst_mem);
297   auto deps = objs<wait_list_tag>(d_deps, num_deps);
298
299   validate_common(q, { src_mem, dst_mem }, deps);
300
301   hard_event *hev = new hard_event(
302      q, CL_COMMAND_COPY_BUFFER_RECT, deps,
303      soft_copy_op(q,
304                   &dst_mem, vector(dst_origin),
305                   {{ 1, dst_row_pitch, dst_slice_pitch }},
306                   &src_mem, vector(src_origin),
307                   {{ 1, src_row_pitch, src_slice_pitch }},
308                   vector(region)));
309
310   ret_object(rd_ev, hev);
311   return CL_SUCCESS;
312
313} catch (error &e) {
314   return e.get();
315}
316
317CLOVER_API cl_int
318clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
319                   const size_t *origin, const size_t *region,
320                   size_t row_pitch, size_t slice_pitch, void *ptr,
321                   cl_uint num_deps, const cl_event *d_deps,
322                   cl_event *rd_ev) try {
323   auto &q = obj(d_q);
324   auto &img = obj<image>(d_mem);
325   auto deps = objs<wait_list_tag>(d_deps, num_deps);
326
327   validate_common(q, { img }, deps);
328
329   if (!ptr)
330      throw error(CL_INVALID_VALUE);
331
332   hard_event *hev = new hard_event(
333      q, CL_COMMAND_READ_IMAGE, deps,
334      soft_copy_op(q,
335                   ptr, {},
336                   {{ 1, row_pitch, slice_pitch }},
337                   &img, vector(origin),
338                   {{ 1, img.row_pitch(), img.slice_pitch() }},
339                   vector(region)));
340
341   ret_object(rd_ev, hev);
342   return CL_SUCCESS;
343
344} catch (error &e) {
345   return e.get();
346}
347
348CLOVER_API cl_int
349clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
350                    const size_t *origin, const size_t *region,
351                    size_t row_pitch, size_t slice_pitch, const void *ptr,
352                    cl_uint num_deps, const cl_event *d_deps,
353                    cl_event *rd_ev) try {
354   auto &q = obj(d_q);
355   auto &img = obj<image>(d_mem);
356   auto deps = objs<wait_list_tag>(d_deps, num_deps);
357
358   validate_common(q, { img }, deps);
359
360   if (!ptr)
361      throw error(CL_INVALID_VALUE);
362
363   hard_event *hev = new hard_event(
364      q, CL_COMMAND_WRITE_IMAGE, deps,
365      soft_copy_op(q,
366                   &img, vector(origin),
367                   {{ 1, img.row_pitch(), img.slice_pitch() }},
368                   ptr, {},
369                   {{ 1, row_pitch, slice_pitch }},
370                   vector(region)));
371
372   ret_object(rd_ev, hev);
373   return CL_SUCCESS;
374
375} catch (error &e) {
376   return e.get();
377}
378
379CLOVER_API cl_int
380clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
381                   const size_t *src_origin, const size_t *dst_origin,
382                   const size_t *region,
383                   cl_uint num_deps, const cl_event *d_deps,
384                   cl_event *rd_ev) try {
385   auto &q = obj(d_q);
386   auto &src_img = obj<image>(d_src_mem);
387   auto &dst_img = obj<image>(d_dst_mem);
388   auto deps = objs<wait_list_tag>(d_deps, num_deps);
389
390   validate_common(q, { src_img, dst_img }, deps);
391
392   hard_event *hev = new hard_event(
393      q, CL_COMMAND_COPY_IMAGE, deps,
394      hard_copy_op(q,
395                   &dst_img, vector(dst_origin),
396                   &src_img, vector(src_origin),
397                   vector(region)));
398
399   ret_object(rd_ev, hev);
400   return CL_SUCCESS;
401
402} catch (error &e) {
403   return e.get();
404}
405
406CLOVER_API cl_int
407clEnqueueCopyImageToBuffer(cl_command_queue d_q,
408                           cl_mem d_src_mem, cl_mem d_dst_mem,
409                           const size_t *src_origin, const size_t *region,
410                           size_t dst_offset,
411                           cl_uint num_deps, const cl_event *d_deps,
412                           cl_event *rd_ev) try {
413   auto &q = obj(d_q);
414   auto &src_img = obj<image>(d_src_mem);
415   auto &dst_mem = obj(d_dst_mem);
416   auto deps = objs<wait_list_tag>(d_deps, num_deps);
417
418   validate_common(q, { src_img, dst_mem }, deps);
419
420   hard_event *hev = new hard_event(
421      q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
422      soft_copy_op(q,
423                   &dst_mem, {{ dst_offset }},
424                   {{ 0, 0, 0 }},
425                   &src_img, vector(src_origin),
426                   {{ 1, src_img.row_pitch(), src_img.slice_pitch() }},
427                   vector(region)));
428
429   ret_object(rd_ev, hev);
430   return CL_SUCCESS;
431
432} catch (error &e) {
433   return e.get();
434}
435
436CLOVER_API cl_int
437clEnqueueCopyBufferToImage(cl_command_queue d_q,
438                           cl_mem d_src_mem, cl_mem d_dst_mem,
439                           size_t src_offset,
440                           const size_t *dst_origin, const size_t *region,
441                           cl_uint num_deps, const cl_event *d_deps,
442                           cl_event *rd_ev) try {
443   auto &q = obj(d_q);
444   auto &src_mem = obj(d_src_mem);
445   auto &dst_img = obj<image>(d_dst_mem);
446   auto deps = objs<wait_list_tag>(d_deps, num_deps);
447
448   validate_common(q, { src_mem, dst_img }, deps);
449
450   hard_event *hev = new hard_event(
451      q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
452      soft_copy_op(q,
453                   &dst_img, vector(dst_origin),
454                   {{ 1, dst_img.row_pitch(), dst_img.slice_pitch() }},
455                   &src_mem, {{ src_offset }},
456                   {{ 0, 0, 0 }},
457                   vector(region)));
458
459   ret_object(rd_ev, hev);
460   return CL_SUCCESS;
461
462} catch (error &e) {
463   return e.get();
464}
465
466CLOVER_API void *
467clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
468                   cl_map_flags flags, size_t offset, size_t size,
469                   cl_uint num_deps, const cl_event *d_deps,
470                   cl_event *rd_ev, cl_int *r_errcode) try {
471   auto &q = obj(d_q);
472   auto &mem = obj(d_mem);
473   auto deps = objs<wait_list_tag>(d_deps, num_deps);
474
475   validate_common(q, { mem }, deps);
476
477   if (offset > mem.size() || offset + size > mem.size())
478      throw error(CL_INVALID_VALUE);
479
480   void *map = mem.resource(q).add_map(
481      q, flags, blocking, {{ offset }}, {{ size }});
482
483   ret_object(rd_ev, new hard_event(q, CL_COMMAND_MAP_BUFFER, deps));
484   ret_error(r_errcode, CL_SUCCESS);
485   return map;
486
487} catch (error &e) {
488   ret_error(r_errcode, e);
489   return NULL;
490}
491
492CLOVER_API void *
493clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
494                  cl_map_flags flags,
495                  const size_t *origin, const size_t *region,
496                  size_t *row_pitch, size_t *slice_pitch,
497                  cl_uint num_deps, const cl_event *d_deps,
498                  cl_event *rd_ev, cl_int *r_errcode) try {
499   auto &q = obj(d_q);
500   auto &img = obj<image>(d_mem);
501   auto deps = objs<wait_list_tag>(d_deps, num_deps);
502
503   validate_common(q, { img }, deps);
504
505   void *map = img.resource(q).add_map(
506      q, flags, blocking, vector(origin), vector(region));
507
508   ret_object(rd_ev, new hard_event(q, CL_COMMAND_MAP_IMAGE, deps));
509   ret_error(r_errcode, CL_SUCCESS);
510   return map;
511
512} catch (error &e) {
513   ret_error(r_errcode, e);
514   return NULL;
515}
516
517CLOVER_API cl_int
518clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
519                        cl_uint num_deps, const cl_event *d_deps,
520                        cl_event *rd_ev) try {
521   auto &q = obj(d_q);
522   auto &mem = obj(d_mem);
523   auto deps = objs<wait_list_tag>(d_deps, num_deps);
524
525   validate_common(q, { mem }, deps);
526
527   hard_event *hev = new hard_event(
528      q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
529      [=, &q, &mem](event &) {
530         mem.resource(q).del_map(ptr);
531      });
532
533   ret_object(rd_ev, hev);
534   return CL_SUCCESS;
535
536} catch (error &e) {
537   return e.get();
538}
539