1//
2// Copyright 2012 Francisco Jerez
3//
4// Permission is hereby granted, free of charge, to any person obtaining a
5// copy of this software and associated documentation files (the "Software"),
6// to deal in the Software without restriction, including without limitation
7// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8// and/or sell copies of the Software, and to permit persons to whom the
9// Software is furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17// THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20// SOFTWARE.
21//
22
23#include <cstring>
24
25#include "api/util.hpp"
26#include "core/event.hpp"
27#include "core/resource.hpp"
28
29using namespace clover;
30
31namespace {
32   typedef resource::point point;
33
34   ///
35   /// Common argument checking shared by memory transfer commands.
36   ///
37   void
38   validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) {
39      if (!q)
40         throw error(CL_INVALID_COMMAND_QUEUE);
41
42      if (bool(num_deps) != bool(deps) ||
43          any_of(is_zero<cl_event>(), deps, deps + num_deps))
44         throw error(CL_INVALID_EVENT_WAIT_LIST);
45
46      if (any_of([&](const cl_event ev) {
47               return &ev->ctx != &q->ctx;
48            }, deps, deps + num_deps))
49         throw error(CL_INVALID_CONTEXT);
50   }
51
52   ///
53   /// Memory object-specific argument checking shared by most memory
54   /// transfer commands.
55   ///
56   void
57   validate_obj(cl_command_queue q, cl_mem obj) {
58      if (!obj)
59         throw error(CL_INVALID_MEM_OBJECT);
60
61      if (&obj->ctx != &q->ctx)
62         throw error(CL_INVALID_CONTEXT);
63   }
64
65   ///
66   /// Class that encapsulates the task of mapping an object of type
67   /// \a T.  The return value of get() should be implicitly
68   /// convertible to \a void *.
69   ///
70   template<typename T> struct __map;
71
72   template<> struct __map<void *> {
73      static void *
74      get(cl_command_queue q, void *obj, cl_map_flags flags,
75          size_t offset, size_t size) {
76         return (char *)obj + offset;
77      }
78   };
79
80   template<> struct __map<const void *> {
81      static const void *
82      get(cl_command_queue q, const void *obj, cl_map_flags flags,
83          size_t offset, size_t size) {
84         return (const char *)obj + offset;
85      }
86   };
87
88   template<> struct __map<memory_obj *> {
89      static mapping
90      get(cl_command_queue q, memory_obj *obj, cl_map_flags flags,
91          size_t offset, size_t size) {
92         return { *q, obj->resource(q), flags, true, { offset }, { size }};
93      }
94   };
95
96   ///
97   /// Software copy from \a src_obj to \a dst_obj.  They can be
98   /// either pointers or memory objects.
99   ///
100   template<typename T, typename S>
101   std::function<void (event &)>
102   soft_copy_op(cl_command_queue q,
103                T dst_obj, const point &dst_orig, const point &dst_pitch,
104                S src_obj, const point &src_orig, const point &src_pitch,
105                const point &region) {
106      return [=](event &) {
107         auto dst = __map<T>::get(q, dst_obj, CL_MAP_WRITE,
108                                  dst_pitch(dst_orig), dst_pitch(region));
109         auto src = __map<S>::get(q, src_obj, CL_MAP_READ,
110                                  src_pitch(src_orig), src_pitch(region));
111         point p;
112
113         for (p[2] = 0; p[2] < region[2]; ++p[2]) {
114            for (p[1] = 0; p[1] < region[1]; ++p[1]) {
115               std::memcpy(static_cast<char *>(dst) + dst_pitch(p),
116                           static_cast<const char *>(src) + src_pitch(p),
117                           src_pitch[0] * region[0]);
118            }
119         }
120      };
121   }
122
123   ///
124   /// Hardware copy from \a src_obj to \a dst_obj.
125   ///
126   template<typename T, typename S>
127   std::function<void (event &)>
128   hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig,
129                S src_obj, const point &src_orig, const point &region) {
130      return [=](event &) {
131         dst_obj->resource(q).copy(*q, dst_orig, region,
132                                   src_obj->resource(q), src_orig);
133      };
134   }
135}
136
137PUBLIC cl_int
138clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
139                    size_t offset, size_t size, void *ptr,
140                    cl_uint num_deps, const cl_event *deps,
141                    cl_event *ev) try {
142   validate_base(q, num_deps, deps);
143   validate_obj(q, obj);
144
145   if (!ptr || offset > obj->size() || offset + size > obj->size())
146      throw error(CL_INVALID_VALUE);
147
148   hard_event *hev = new hard_event(
149      *q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps },
150      soft_copy_op(q,
151                   ptr, { 0 }, { 1 },
152                   obj, { offset }, { 1 },
153                   { size, 1, 1 }));
154
155   ret_object(ev, hev);
156   return CL_SUCCESS;
157
158} catch (error &e) {
159   return e.get();
160}
161
162PUBLIC cl_int
163clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
164                     size_t offset, size_t size, const void *ptr,
165                     cl_uint num_deps, const cl_event *deps,
166                     cl_event *ev) try {
167   validate_base(q, num_deps, deps);
168   validate_obj(q, obj);
169
170   if (!ptr || offset > obj->size() || offset + size > obj->size())
171      throw error(CL_INVALID_VALUE);
172
173   hard_event *hev = new hard_event(
174      *q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps },
175      soft_copy_op(q,
176                   obj, { offset }, { 1 },
177                   ptr, { 0 }, { 1 },
178                   { size, 1, 1 }));
179
180   ret_object(ev, hev);
181   return CL_SUCCESS;
182
183} catch (error &e) {
184   return e.get();
185}
186
187PUBLIC cl_int
188clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
189                        const size_t *obj_origin, const size_t *host_origin,
190                        const size_t *region,
191                        size_t obj_row_pitch, size_t obj_slice_pitch,
192                        size_t host_row_pitch, size_t host_slice_pitch,
193                        void *ptr,
194                        cl_uint num_deps, const cl_event *deps,
195                        cl_event *ev) try {
196   validate_base(q, num_deps, deps);
197   validate_obj(q, obj);
198
199   if (!ptr)
200      throw error(CL_INVALID_VALUE);
201
202   hard_event *hev = new hard_event(
203      *q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps },
204      soft_copy_op(q,
205                   ptr, host_origin,
206                   { 1, host_row_pitch, host_slice_pitch },
207                   obj, obj_origin,
208                   { 1, obj_row_pitch, obj_slice_pitch },
209                   region));
210
211   ret_object(ev, hev);
212   return CL_SUCCESS;
213
214} catch (error &e) {
215   return e.get();
216}
217
218PUBLIC cl_int
219clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
220                         const size_t *obj_origin, const size_t *host_origin,
221                         const size_t *region,
222                         size_t obj_row_pitch, size_t obj_slice_pitch,
223                         size_t host_row_pitch, size_t host_slice_pitch,
224                         const void *ptr,
225                         cl_uint num_deps, const cl_event *deps,
226                         cl_event *ev) try {
227   validate_base(q, num_deps, deps);
228   validate_obj(q, obj);
229
230   if (!ptr)
231      throw error(CL_INVALID_VALUE);
232
233   hard_event *hev = new hard_event(
234      *q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps },
235      soft_copy_op(q,
236                   obj, obj_origin,
237                   { 1, obj_row_pitch, obj_slice_pitch },
238                   ptr, host_origin,
239                   { 1, host_row_pitch, host_slice_pitch },
240                   region));
241
242   ret_object(ev, hev);
243   return CL_SUCCESS;
244
245} catch (error &e) {
246   return e.get();
247}
248
249PUBLIC cl_int
250clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
251                    size_t src_offset, size_t dst_offset, size_t size,
252                    cl_uint num_deps, const cl_event *deps,
253                    cl_event *ev) try {
254   validate_base(q, num_deps, deps);
255   validate_obj(q, src_obj);
256   validate_obj(q, dst_obj);
257
258   hard_event *hev = new hard_event(
259      *q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps },
260      hard_copy_op(q, dst_obj, { dst_offset },
261                   src_obj, { src_offset },
262                   { size, 1, 1 }));
263
264   ret_object(ev, hev);
265   return CL_SUCCESS;
266
267} catch (error &e) {
268   return e.get();
269}
270
271PUBLIC cl_int
272clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
273                        const size_t *src_origin, const size_t *dst_origin,
274                        const size_t *region,
275                        size_t src_row_pitch, size_t src_slice_pitch,
276                        size_t dst_row_pitch, size_t dst_slice_pitch,
277                        cl_uint num_deps, const cl_event *deps,
278                        cl_event *ev) try {
279   validate_base(q, num_deps, deps);
280   validate_obj(q, src_obj);
281   validate_obj(q, dst_obj);
282
283   hard_event *hev = new hard_event(
284      *q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps },
285      soft_copy_op(q,
286                   dst_obj, dst_origin,
287                   { 1, dst_row_pitch, dst_slice_pitch },
288                   src_obj, src_origin,
289                   { 1, src_row_pitch, src_slice_pitch },
290                   region));
291
292   ret_object(ev, hev);
293   return CL_SUCCESS;
294
295} catch (error &e) {
296   return e.get();
297}
298
299PUBLIC cl_int
300clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
301                   const size_t *origin, const size_t *region,
302                   size_t row_pitch, size_t slice_pitch, void *ptr,
303                   cl_uint num_deps, const cl_event *deps,
304                   cl_event *ev) try {
305   image *img = dynamic_cast<image *>(obj);
306
307   validate_base(q, num_deps, deps);
308   validate_obj(q, img);
309
310   if (!ptr)
311      throw error(CL_INVALID_VALUE);
312
313   hard_event *hev = new hard_event(
314      *q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps },
315      soft_copy_op(q,
316                   ptr, {},
317                   { 1, row_pitch, slice_pitch },
318                   obj, origin,
319                   { 1, img->row_pitch(), img->slice_pitch() },
320                   region));
321
322   ret_object(ev, hev);
323   return CL_SUCCESS;
324
325} catch (error &e) {
326   return e.get();
327}
328
329PUBLIC cl_int
330clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
331                    const size_t *origin, const size_t *region,
332                    size_t row_pitch, size_t slice_pitch, const void *ptr,
333                    cl_uint num_deps, const cl_event *deps,
334                    cl_event *ev) try {
335   image *img = dynamic_cast<image *>(obj);
336
337   validate_base(q, num_deps, deps);
338   validate_obj(q, img);
339
340   if (!ptr)
341      throw error(CL_INVALID_VALUE);
342
343   hard_event *hev = new hard_event(
344      *q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps },
345      soft_copy_op(q,
346                   obj, origin,
347                   { 1, img->row_pitch(), img->slice_pitch() },
348                   ptr, {},
349                   { 1, row_pitch, slice_pitch },
350                   region));
351
352   ret_object(ev, hev);
353   return CL_SUCCESS;
354
355} catch (error &e) {
356   return e.get();
357}
358
359PUBLIC cl_int
360clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
361                   const size_t *src_origin, const size_t *dst_origin,
362                   const size_t *region,
363                   cl_uint num_deps, const cl_event *deps,
364                   cl_event *ev) try {
365   image *src_img = dynamic_cast<image *>(src_obj);
366   image *dst_img = dynamic_cast<image *>(dst_obj);
367
368   validate_base(q, num_deps, deps);
369   validate_obj(q, src_img);
370   validate_obj(q, dst_img);
371
372   hard_event *hev = new hard_event(
373      *q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps },
374      hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region));
375
376   ret_object(ev, hev);
377   return CL_SUCCESS;
378
379} catch (error &e) {
380   return e.get();
381}
382
383PUBLIC cl_int
384clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
385                           const size_t *src_origin, const size_t *region,
386                           size_t dst_offset,
387                           cl_uint num_deps, const cl_event *deps,
388                           cl_event *ev) try {
389   image *src_img = dynamic_cast<image *>(src_obj);
390
391   validate_base(q, num_deps, deps);
392   validate_obj(q, src_img);
393   validate_obj(q, dst_obj);
394
395   hard_event *hev = new hard_event(
396      *q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps },
397      soft_copy_op(q,
398                   dst_obj, { dst_offset },
399                   { 0, 0, 0 },
400                   src_obj, src_origin,
401                   { 1, src_img->row_pitch(), src_img->slice_pitch() },
402                   region));
403
404   ret_object(ev, hev);
405   return CL_SUCCESS;
406
407} catch (error &e) {
408   return e.get();
409}
410
411PUBLIC cl_int
412clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
413                           size_t src_offset,
414                           const size_t *dst_origin, const size_t *region,
415                           cl_uint num_deps, const cl_event *deps,
416                           cl_event *ev) try {
417   image *dst_img = dynamic_cast<image *>(src_obj);
418
419   validate_base(q, num_deps, deps);
420   validate_obj(q, src_obj);
421   validate_obj(q, dst_img);
422
423   hard_event *hev = new hard_event(
424      *q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps },
425      soft_copy_op(q,
426                   dst_obj, dst_origin,
427                   { 1, dst_img->row_pitch(), dst_img->slice_pitch() },
428                   src_obj, { src_offset },
429                   { 0, 0, 0 },
430                   region));
431
432   ret_object(ev, hev);
433   return CL_SUCCESS;
434
435} catch (error &e) {
436   return e.get();
437}
438
439PUBLIC void *
440clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
441                   cl_map_flags flags, size_t offset, size_t size,
442                   cl_uint num_deps, const cl_event *deps,
443                   cl_event *ev, cl_int *errcode_ret) try {
444   validate_base(q, num_deps, deps);
445   validate_obj(q, obj);
446
447   if (offset > obj->size() || offset + size > obj->size())
448      throw error(CL_INVALID_VALUE);
449
450   void *map = obj->resource(q).add_map(
451      *q, flags, blocking, { offset }, { size });
452
453   ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER,
454                                 { deps, deps + num_deps }));
455   ret_error(errcode_ret, CL_SUCCESS);
456   return map;
457
458} catch (error &e) {
459   ret_error(errcode_ret, e);
460   return NULL;
461}
462
463PUBLIC void *
464clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
465                  cl_map_flags flags,
466                  const size_t *origin, const size_t *region,
467                  size_t *row_pitch, size_t *slice_pitch,
468                  cl_uint num_deps, const cl_event *deps,
469                  cl_event *ev, cl_int *errcode_ret) try {
470   image *img = dynamic_cast<image *>(obj);
471
472   validate_base(q, num_deps, deps);
473   validate_obj(q, img);
474
475   void *map = obj->resource(q).add_map(
476      *q, flags, blocking, origin, region);
477
478   ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE,
479                                 { deps, deps + num_deps }));
480   ret_error(errcode_ret, CL_SUCCESS);
481   return map;
482
483} catch (error &e) {
484   ret_error(errcode_ret, e);
485   return NULL;
486}
487
488PUBLIC cl_int
489clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr,
490                        cl_uint num_deps, const cl_event *deps,
491                        cl_event *ev) try {
492   validate_base(q, num_deps, deps);
493   validate_obj(q, obj);
494
495   hard_event *hev = new hard_event(
496      *q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps },
497      [=](event &) {
498         obj->resource(q).del_map(ptr);
499      });
500
501   ret_object(ev, hev);
502   return CL_SUCCESS;
503
504} catch (error &e) {
505   return e.get();
506}
507