accelerate.c revision 9ff5002b42d7121a87c8ad4eb6dd2a632630f70a
1/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3%                                                                             %
4%                                                                             %
5%                                                                             %
6%     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
7%    A   A   C       C      E      L      E      R   R  A   A    T    E       %
8%    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
9%    A   A   C       C      E      L      E      R R    A   A    T    E       %
10%    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
11%                                                                             %
12%                                                                             %
13%                       MagickCore Acceleration Methods                       %
14%                                                                             %
15%                              Software Design                                %
16%                                  Cristy                                     %
17%                               SiuChi Chan                                   %
18%                               Guansong Zhang                                %
19%                               January 2010                                  %
20%                                                                             %
21%                                                                             %
22%  Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization      %
23%  dedicated to making software imaging solutions freely available.           %
24%                                                                             %
25%  You may not use this file except in compliance with the License.  You may  %
26%  obtain a copy of the License at                                            %
27%                                                                             %
28%    http://www.imagemagick.org/script/license.php                            %
29%                                                                             %
30%  Unless required by applicable law or agreed to in writing, software        %
31%  distributed under the License is distributed on an "AS IS" BASIS,          %
32%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
33%  See the License for the specific language governing permissions and        %
34%  limitations under the License.                                             %
35%                                                                             %
36%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
37*/
38
39/*
40Include declarations.
41*/
42#include "MagickCore/studio.h"
43#include "MagickCore/accelerate.h"
44#include "MagickCore/accelerate-private.h"
45#include "MagickCore/artifact.h"
46#include "MagickCore/cache.h"
47#include "MagickCore/cache-private.h"
48#include "MagickCore/cache-view.h"
49#include "MagickCore/color-private.h"
50#include "MagickCore/delegate-private.h"
51#include "MagickCore/enhance.h"
52#include "MagickCore/exception.h"
53#include "MagickCore/exception-private.h"
54#include "MagickCore/gem.h"
55#include "MagickCore/hashmap.h"
56#include "MagickCore/image.h"
57#include "MagickCore/image-private.h"
58#include "MagickCore/list.h"
59#include "MagickCore/memory_.h"
60#include "MagickCore/monitor-private.h"
61#include "MagickCore/accelerate.h"
62#include "MagickCore/opencl.h"
63#include "MagickCore/opencl-private.h"
64#include "MagickCore/option.h"
65#include "MagickCore/pixel-accessor.h"
66#include "MagickCore/pixel-private.h"
67#include "MagickCore/prepress.h"
68#include "MagickCore/quantize.h"
69#include "MagickCore/quantum-private.h"
70#include "MagickCore/random_.h"
71#include "MagickCore/random-private.h"
72#include "MagickCore/registry.h"
73#include "MagickCore/resize.h"
74#include "MagickCore/resize-private.h"
75#include "MagickCore/semaphore.h"
76#include "MagickCore/splay-tree.h"
77#include "MagickCore/statistic.h"
78#include "MagickCore/string_.h"
79#include "MagickCore/string-private.h"
80#include "MagickCore/token.h"
81
82#ifdef MAGICKCORE_CLPERFMARKER
83#include "CLPerfMarker.h"
84#endif
85
86#define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
87#define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
88
89#if defined(MAGICKCORE_OPENCL_SUPPORT)
90
91#define ALIGNED(pointer,type) ((((long)(pointer)) & (sizeof(type)-1)) == 0)
92
93/* pad the global workgroup size to the next multiple of
94   the local workgroup size */
95inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
96  const unsigned int orgGlobalSize,const unsigned int localGroupSize)
97{
98  return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
99}
100
101static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception)
102{
103  MagickBooleanType
104    flag;
105
106  MagickCLEnv
107    clEnv;
108
109  clEnv=GetDefaultOpenCLEnv();
110
111  GetMagickOpenCLEnvParam(clEnv,MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED,
112    sizeof(MagickBooleanType),&flag,exception);
113  if (flag != MagickFalse)
114    return(MagickFalse);
115
116  GetMagickOpenCLEnvParam(clEnv,MAGICK_OPENCL_ENV_PARAM_OPENCL_INITIALIZED,
117    sizeof(MagickBooleanType),&flag,exception);
118  if (flag == MagickFalse)
119    {
120      if (InitOpenCLEnv(clEnv,exception) == MagickFalse)
121        return(MagickFalse);
122
123      GetMagickOpenCLEnvParam(clEnv,MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED,
124        sizeof(MagickBooleanType),&flag,exception);
125      if (flag != MagickFalse)
126        return(MagickFalse);
127    }
128
129  return(MagickTrue);
130}
131
132static MagickBooleanType checkAccelerateCondition(const Image* image,
133  const ChannelType channel)
134{
135  /* check if the image's colorspace is supported */
136  if (image->colorspace != RGBColorspace &&
137      image->colorspace != sRGBColorspace &&
138      image->colorspace != GRAYColorspace)
139    return(MagickFalse);
140
141  /* check if the channel is supported */
142  if (((channel & RedChannel) == 0) ||
143      ((channel & GreenChannel) == 0) ||
144      ((channel & BlueChannel) == 0))
145    return(MagickFalse);
146
147  /* check if the virtual pixel method is compatible with the OpenCL implementation */
148  if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
149      (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
150    return(MagickFalse);
151
152  /* check if the image has read / write mask */
153  if (image->read_mask != MagickFalse || image->write_mask != MagickFalse)
154    return(MagickFalse);
155
156  /* check if pixel order is RGBA */
157  if (GetPixelChannelOffset(image,RedPixelChannel) != 0 ||
158      GetPixelChannelOffset(image,GreenPixelChannel) != 1 ||
159      GetPixelChannelOffset(image,BluePixelChannel) != 2 ||
160      GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
161    return(MagickFalse);
162
163  /* check if all channels are available */
164  if (((GetPixelRedTraits(image) & UpdatePixelTrait) == 0) ||
165      ((GetPixelGreenTraits(image) & UpdatePixelTrait) == 0) ||
166      ((GetPixelBlueTraits(image) & UpdatePixelTrait) == 0) ||
167      ((GetPixelAlphaTraits(image) & UpdatePixelTrait) == 0))
168    return(MagickFalse);
169
170  return(MagickTrue);
171}
172
173static MagickBooleanType checkHistogramCondition(Image *image,
174  const ChannelType channel)
175{
176  /* ensure this is the only pass get in for now. */
177  if ((channel & SyncChannels) == 0)
178    return MagickFalse;
179
180  if (image->intensity == Rec601LuminancePixelIntensityMethod ||
181      image->intensity == Rec709LuminancePixelIntensityMethod)
182    return MagickFalse;
183
184  if (image->colorspace != sRGBColorspace)
185    return MagickFalse;
186
187  return MagickTrue;
188}
189
190static MagickBooleanType splitImage(const Image* image)
191{
192  MagickBooleanType
193    split;
194
195  MagickCLEnv
196    clEnv;
197
198  unsigned long
199    allocSize,
200    tempSize;
201
202  clEnv=GetDefaultOpenCLEnv();
203
204  allocSize=GetOpenCLDeviceMaxMemAllocSize(clEnv);
205  tempSize=image->columns * image->rows * 4 * 4;
206
207  split = ((tempSize > allocSize) ? MagickTrue : MagickFalse);
208  return(split);
209}
210
211/*
212%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
213%                                                                             %
214%                                                                             %
215%                                                                             %
216%     C o n v o l v e I m a g e  w i t h  O p e n C L                         %
217%                                                                             %
218%                                                                             %
219%                                                                             %
220%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
221%
222%  ConvolveImage() applies a custom convolution kernel to the image.
223%
224%  The format of the ConvolveImage method is:
225%
226%      Image *ConvolveImage(const Image *image,const size_t order,
227%        const double *kernel,ExceptionInfo *exception)
228%      Image *ConvolveImageChannel(const Image *image,const ChannelType channel,
229%        const size_t order,const double *kernel,ExceptionInfo *exception)
230%
231%  A description of each parameter follows:
232%
233%    o image: the image.
234%
235%    o channel: the channel type.
236%
237%    o kernel: kernel info.
238%
239%    o exception: return any errors or warnings in this structure.
240%
241*/
242
243static Image *ComputeConvolveImage(const Image* image,
244  const ChannelType channel,const KernelInfo *kernel,ExceptionInfo *exception)
245{
246  CacheView
247    *filteredImage_view,
248    *image_view;
249
250  cl_command_queue
251    queue;
252
253  cl_context
254    context;
255
256  cl_kernel
257    clkernel;
258
259  cl_int
260    clStatus;
261
262  cl_mem
263    convolutionKernel,
264    filteredImageBuffer,
265    imageBuffer;
266
267  cl_mem_flags
268    mem_flags;
269
270  cl_ulong
271    deviceLocalMemorySize;
272
273  const void
274    *inputPixels;
275
276  float
277    *kernelBufferPtr;
278
279  Image
280    *filteredImage;
281
282  MagickBooleanType
283    outputReady;
284
285  MagickCLEnv
286    clEnv;
287
288  MagickSizeType
289    length;
290
291  size_t
292    global_work_size[3],
293    localGroupSize[3],
294    localMemoryRequirement;
295
296  unsigned
297    kernelSize;
298
299  unsigned int
300    filterHeight,
301    filterWidth,
302    i,
303    imageHeight,
304    imageWidth,
305    matte;
306
307  void
308    *filteredPixels,
309    *hostPtr;
310
311  /* intialize all CL objects to NULL */
312  context = NULL;
313  imageBuffer = NULL;
314  filteredImageBuffer = NULL;
315  convolutionKernel = NULL;
316  clkernel = NULL;
317  queue = NULL;
318
319  filteredImage = NULL;
320  filteredImage_view = NULL;
321  outputReady = MagickFalse;
322
323  clEnv = GetDefaultOpenCLEnv();
324  context = GetOpenCLContext(clEnv);
325
326  image_view=AcquireVirtualCacheView(image,exception);
327  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
328  if (inputPixels == (const void *) NULL)
329  {
330    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
331    goto cleanup;
332  }
333
334  /* Create and initialize OpenCL buffers. */
335
336  /* If the host pointer is aligned to the size of CLPixelPacket,
337     then use the host buffer directly from the GPU; otherwise,
338     create a buffer on the GPU and copy the data over */
339  if (ALIGNED(inputPixels,CLPixelPacket))
340  {
341    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
342  }
343  else
344  {
345    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
346  }
347  /* create a CL buffer from image pixel buffer */
348  length = image->columns * image->rows;
349  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
350  if (clStatus != CL_SUCCESS)
351  {
352    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
353    goto cleanup;
354  }
355
356  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
357  assert(filteredImage != NULL);
358  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
359  {
360    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
361    goto cleanup;
362  }
363  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
364  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
365  if (filteredPixels == (void *) NULL)
366  {
367    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
368    goto cleanup;
369  }
370
371  if (ALIGNED(filteredPixels,CLPixelPacket))
372  {
373    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
374    hostPtr = filteredPixels;
375  }
376  else
377  {
378    mem_flags = CL_MEM_WRITE_ONLY;
379    hostPtr = NULL;
380  }
381  /* create a CL buffer from image pixel buffer */
382  length = image->columns * image->rows;
383  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
384  if (clStatus != CL_SUCCESS)
385  {
386    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
387    goto cleanup;
388  }
389
390  kernelSize = kernel->width * kernel->height;
391  convolutionKernel = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
392  if (clStatus != CL_SUCCESS)
393  {
394    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
395    goto cleanup;
396  }
397
398  queue = AcquireOpenCLCommandQueue(clEnv);
399
400  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
401          , 0, NULL, NULL, &clStatus);
402  if (clStatus != CL_SUCCESS)
403  {
404    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
405    goto cleanup;
406  }
407  for (i = 0; i < kernelSize; i++)
408  {
409    kernelBufferPtr[i] = (float) kernel->values[i];
410  }
411  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
412  if (clStatus != CL_SUCCESS)
413  {
414    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
415    goto cleanup;
416  }
417  clEnv->library->clFlush(queue);
418
419  deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
420
421  /* Compute the local memory requirement for a 16x16 workgroup.
422     If it's larger than 16k, reduce the workgroup size to 8x8 */
423  localGroupSize[0] = 16;
424  localGroupSize[1] = 16;
425  localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
426    + kernel->width*kernel->height*sizeof(float);
427
428  if (localMemoryRequirement > deviceLocalMemorySize)
429  {
430    localGroupSize[0] = 8;
431    localGroupSize[1] = 8;
432    localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
433      + kernel->width*kernel->height*sizeof(float);
434  }
435  if (localMemoryRequirement <= deviceLocalMemorySize)
436  {
437    /* get the OpenCL kernel */
438    clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ConvolveOptimized");
439    if (clkernel == NULL)
440    {
441      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
442      goto cleanup;
443    }
444
445    /* set the kernel arguments */
446    i = 0;
447    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
448    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
449    imageWidth = image->columns;
450    imageHeight = image->rows;
451    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
452    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
453    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
454    filterWidth = kernel->width;
455    filterHeight = kernel->height;
456    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
457    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
458    matte = (image->alpha_trait==BlendPixelTrait)?1:0;
459    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
460    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
461    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
462    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
463    if (clStatus != CL_SUCCESS)
464    {
465      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
466      goto cleanup;
467    }
468
469    /* pad the global size to a multiple of the local work size dimension */
470    global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
471    global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
472
473    /* launch the kernel */
474    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
475    if (clStatus != CL_SUCCESS)
476    {
477      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
478      goto cleanup;
479    }
480  }
481  else
482  {
483    /* get the OpenCL kernel */
484    clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
485    if (clkernel == NULL)
486    {
487      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
488      goto cleanup;
489    }
490
491    /* set the kernel arguments */
492    i = 0;
493    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
494    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
495    imageWidth = image->columns;
496    imageHeight = image->rows;
497    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
498    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
499    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
500    filterWidth = kernel->width;
501    filterHeight = kernel->height;
502    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
503    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
504    matte = (image->alpha_trait==BlendPixelTrait)?1:0;
505    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
506    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
507    if (clStatus != CL_SUCCESS)
508    {
509      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
510      goto cleanup;
511    }
512
513    localGroupSize[0] = 8;
514    localGroupSize[1] = 8;
515    global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
516    global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
517    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, NULL);
518
519    if (clStatus != CL_SUCCESS)
520    {
521      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
522      goto cleanup;
523    }
524  }
525  clEnv->library->clFlush(queue);
526
527  if (ALIGNED(filteredPixels,CLPixelPacket))
528  {
529    length = image->columns * image->rows;
530    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
531  }
532  else
533  {
534    length = image->columns * image->rows;
535    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
536  }
537  if (clStatus != CL_SUCCESS)
538  {
539    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
540    goto cleanup;
541  }
542
543  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
544
545cleanup:
546  OpenCLLogException(__FUNCTION__,__LINE__,exception);
547
548  image_view=DestroyCacheView(image_view);
549  if (filteredImage_view != NULL)
550    filteredImage_view=DestroyCacheView(filteredImage_view);
551
552  if (imageBuffer != NULL)
553    clEnv->library->clReleaseMemObject(imageBuffer);
554
555  if (filteredImageBuffer != NULL)
556    clEnv->library->clReleaseMemObject(filteredImageBuffer);
557
558  if (convolutionKernel != NULL)
559    clEnv->library->clReleaseMemObject(convolutionKernel);
560
561  if (clkernel != NULL)
562    RelinquishOpenCLKernel(clEnv, clkernel);
563
564  if (queue != NULL)
565    RelinquishOpenCLCommandQueue(clEnv, queue);
566
567  if (outputReady == MagickFalse)
568  {
569    if (filteredImage != NULL)
570    {
571      DestroyImage(filteredImage);
572      filteredImage = NULL;
573    }
574  }
575
576  return(filteredImage);
577}
578
579MagickExport Image *AccelerateConvolveImageChannel(const Image *image,
580  const ChannelType channel,const KernelInfo *kernel,ExceptionInfo *exception)
581{
582  Image
583    *filteredImage;
584
585  assert(image != NULL);
586  assert(kernel != (KernelInfo *) NULL);
587  assert(exception != (ExceptionInfo *) NULL);
588
589  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
590      (checkAccelerateCondition(image, channel) == MagickFalse))
591    return NULL;
592
593  filteredImage=ComputeConvolveImage(image, channel, kernel, exception);
594  return(filteredImage);
595}
596
597static MagickBooleanType ComputeFunctionImage(Image *image,
598  const ChannelType channel,const MagickFunction function,
599  const size_t number_parameters,const double *parameters,
600  ExceptionInfo *exception)
601{
602  CacheView
603    *image_view;
604
605  cl_command_queue
606    queue;
607
608  cl_context
609    context;
610
611  cl_int
612    clStatus;
613
614  cl_kernel
615    clkernel;
616
617  cl_mem
618    imageBuffer,
619    parametersBuffer;
620
621  cl_mem_flags
622    mem_flags;
623
624  float
625    *parametersBufferPtr;
626
627  MagickBooleanType
628    status;
629
630  MagickCLEnv
631    clEnv;
632
633  MagickSizeType
634    length;
635
636  size_t
637    globalWorkSize[2];
638
639  unsigned int
640    i;
641
642  void
643    *pixels;
644
645  status = MagickFalse;
646
647  context = NULL;
648  clkernel = NULL;
649  queue = NULL;
650  imageBuffer = NULL;
651  parametersBuffer = NULL;
652
653  clEnv = GetDefaultOpenCLEnv();
654  context = GetOpenCLContext(clEnv);
655
656  image_view=AcquireAuthenticCacheView(image,exception);
657  pixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
658  if (pixels == (void *) NULL)
659  {
660    (void) OpenCLThrowMagickException(exception, GetMagickModule(), CacheWarning,
661      "GetPixelCachePixels failed.",
662      "'%s'", image->filename);
663    goto cleanup;
664  }
665
666
667  if (ALIGNED(pixels,CLPixelPacket))
668  {
669    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
670  }
671  else
672  {
673    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
674  }
675  /* create a CL buffer from image pixel buffer */
676  length = image->columns * image->rows;
677  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus);
678  if (clStatus != CL_SUCCESS)
679  {
680    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
681    goto cleanup;
682  }
683
684  parametersBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus);
685  if (clStatus != CL_SUCCESS)
686  {
687    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
688    goto cleanup;
689  }
690
691  queue = AcquireOpenCLCommandQueue(clEnv);
692
693  parametersBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float)
694                , 0, NULL, NULL, &clStatus);
695  if (clStatus != CL_SUCCESS)
696  {
697    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
698    goto cleanup;
699  }
700  for (i = 0; i < number_parameters; i++)
701  {
702    parametersBufferPtr[i] = (float)parameters[i];
703  }
704  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL);
705  if (clStatus != CL_SUCCESS)
706  {
707    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
708    goto cleanup;
709  }
710  clEnv->library->clFlush(queue);
711
712  clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "FunctionImage");
713  if (clkernel == NULL)
714  {
715    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
716    goto cleanup;
717  }
718
719  /* set the kernel arguments */
720  i = 0;
721  clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
722  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
723  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
724  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
725  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
726  if (clStatus != CL_SUCCESS)
727  {
728    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
729    goto cleanup;
730  }
731
732  globalWorkSize[0] = image->columns;
733  globalWorkSize[1] = image->rows;
734  /* launch the kernel */
735  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
736  if (clStatus != CL_SUCCESS)
737  {
738    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
739    goto cleanup;
740  }
741  clEnv->library->clFlush(queue);
742
743
744  if (ALIGNED(pixels,CLPixelPacket))
745  {
746    length = image->columns * image->rows;
747    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
748  }
749  else
750  {
751    length = image->columns * image->rows;
752    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL);
753  }
754  if (clStatus != CL_SUCCESS)
755  {
756    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
757    goto cleanup;
758  }
759  status=SyncCacheViewAuthenticPixels(image_view,exception);
760
761cleanup:
762  OpenCLLogException(__FUNCTION__,__LINE__,exception);
763
764  image_view=DestroyCacheView(image_view);
765
766  if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel);
767  if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
768  if (imageBuffer != NULL) clEnv->library->clReleaseMemObject(imageBuffer);
769  if (parametersBuffer != NULL) clEnv->library->clReleaseMemObject(parametersBuffer);
770
771  return(status);
772}
773
774MagickExport MagickBooleanType AccelerateFunctionImage(Image *image,
775  const ChannelType channel,const MagickFunction function,
776  const size_t number_parameters,const double *parameters,
777  ExceptionInfo *exception)
778{
779  MagickBooleanType
780    status;
781
782  assert(image != NULL);
783  assert(exception != (ExceptionInfo *) NULL);
784
785  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
786      (checkAccelerateCondition(image, channel) == MagickFalse))
787    return(MagickFalse);
788
789  status=ComputeFunctionImage(image, channel, function, number_parameters, parameters, exception);
790  return(status);
791}
792
793/*
794%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
795%                                                                             %
796%                                                                             %
797%                                                                             %
798%     B l u r I m a g e  w i t h  O p e n C L                                 %
799%                                                                             %
800%                                                                             %
801%                                                                             %
802%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
803%
804%  BlurImage() blurs an image.  We convolve the image with a Gaussian operator
805%  of the given radius and standard deviation (sigma).  For reasonable results,
806%  the radius should be larger than sigma.  Use a radius of 0 and BlurImage()
807%  selects a suitable radius for you.
808%
809%  The format of the BlurImage method is:
810%
811%      Image *BlurImage(const Image *image,const double radius,
812%        const double sigma,ExceptionInfo *exception)
813%      Image *BlurImageChannel(const Image *image,const ChannelType channel,
814%        const double radius,const double sigma,ExceptionInfo *exception)
815%
816%  A description of each parameter follows:
817%
818%    o image: the image.
819%
820%    o channel: the channel type.
821%
822%    o radius: the radius of the Gaussian, in pixels, not counting the center
823%      pixel.
824%
825%    o sigma: the standard deviation of the Gaussian, in pixels.
826%
827%    o exception: return any errors or warnings in this structure.
828%
829*/
830
831static Image *ComputeBlurImage(const Image* image,const ChannelType channel,
832  const double radius,const double sigma,ExceptionInfo *exception)
833{
834  CacheView
835    *filteredImage_view,
836    *image_view;
837
838  char
839    geometry[MaxTextExtent];
840
841  cl_command_queue
842    queue;
843
844  cl_context
845    context;
846
847  cl_int
848    clStatus;
849
850  cl_kernel
851    blurColumnKernel,
852    blurRowKernel;
853
854  cl_mem
855    filteredImageBuffer,
856    imageBuffer,
857    imageKernelBuffer,
858    tempImageBuffer;
859
860  cl_mem_flags
861    mem_flags;
862
863  const void
864    *inputPixels;
865
866  float
867    *kernelBufferPtr;
868
869  Image
870    *filteredImage;
871
872  MagickBooleanType
873    outputReady;
874
875  MagickCLEnv
876    clEnv;
877
878  MagickSizeType
879    length;
880
881  KernelInfo
882    *kernel;
883
884  unsigned int
885    i,
886    imageColumns,
887    imageRows,
888    kernelWidth;
889
890  void
891    *filteredPixels,
892    *hostPtr;
893
894  context = NULL;
895  filteredImage = NULL;
896  filteredImage_view = NULL;
897  imageBuffer = NULL;
898  tempImageBuffer = NULL;
899  filteredImageBuffer = NULL;
900  imageKernelBuffer = NULL;
901  blurRowKernel = NULL;
902  blurColumnKernel = NULL;
903  queue = NULL;
904  kernel = NULL;
905
906  outputReady = MagickFalse;
907
908  clEnv = GetDefaultOpenCLEnv();
909  context = GetOpenCLContext(clEnv);
910  queue = AcquireOpenCLCommandQueue(clEnv);
911
912  /* Create and initialize OpenCL buffers. */
913  {
914    image_view=AcquireVirtualCacheView(image,exception);
915    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
916    if (inputPixels == (const void *) NULL)
917    {
918      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
919      goto cleanup;
920    }
921    /* If the host pointer is aligned to the size of CLPixelPacket,
922     then use the host buffer directly from the GPU; otherwise,
923     create a buffer on the GPU and copy the data over */
924    if (ALIGNED(inputPixels,CLPixelPacket))
925    {
926      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
927    }
928    else
929    {
930      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
931    }
932    /* create a CL buffer from image pixel buffer */
933    length = image->columns * image->rows;
934    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
935    if (clStatus != CL_SUCCESS)
936    {
937      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
938      goto cleanup;
939    }
940  }
941
942  /* create output */
943  {
944    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
945    assert(filteredImage != NULL);
946    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
947    {
948      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
949      goto cleanup;
950    }
951    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
952    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
953    if (filteredPixels == (void *) NULL)
954    {
955      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
956      goto cleanup;
957    }
958
959    if (ALIGNED(filteredPixels,CLPixelPacket))
960    {
961      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
962      hostPtr = filteredPixels;
963    }
964    else
965    {
966      mem_flags = CL_MEM_WRITE_ONLY;
967      hostPtr = NULL;
968    }
969    /* create a CL buffer from image pixel buffer */
970    length = image->columns * image->rows;
971    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
972    if (clStatus != CL_SUCCESS)
973    {
974      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
975      goto cleanup;
976    }
977  }
978
979  /* create processing kernel */
980  {
981    (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
982    kernel=AcquireKernelInfo(geometry);
983    if (kernel == (KernelInfo *) NULL)
984    {
985      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
986      goto cleanup;
987    }
988
989    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
990    if (clStatus != CL_SUCCESS)
991    {
992      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
993      goto cleanup;
994    }
995    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
996    if (clStatus != CL_SUCCESS)
997    {
998      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
999      goto cleanup;
1000    }
1001
1002    for (i = 0; i < kernel->width; i++)
1003    {
1004      kernelBufferPtr[i] = (float) kernel->values[i];
1005    }
1006
1007    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1008    if (clStatus != CL_SUCCESS)
1009    {
1010      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
1011      goto cleanup;
1012    }
1013  }
1014
1015  {
1016
1017    /* create temp buffer */
1018    {
1019      length = image->columns * image->rows;
1020      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1021      if (clStatus != CL_SUCCESS)
1022      {
1023        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1024        goto cleanup;
1025      }
1026    }
1027
1028    /* get the OpenCL kernels */
1029    {
1030      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
1031      if (blurRowKernel == NULL)
1032      {
1033        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1034        goto cleanup;
1035      };
1036
1037      blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumn");
1038      if (blurColumnKernel == NULL)
1039      {
1040        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1041        goto cleanup;
1042      };
1043    }
1044
1045    {
1046      /* need logic to decide this value */
1047      int chunkSize = 256;
1048
1049      {
1050        imageColumns = image->columns;
1051        imageRows = image->rows;
1052
1053        /* set the kernel arguments */
1054        i = 0;
1055        clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1056        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1057        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1058        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1059        kernelWidth = kernel->width;
1060        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1061        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1062        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1063        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1064        if (clStatus != CL_SUCCESS)
1065        {
1066          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1067          goto cleanup;
1068        }
1069      }
1070
1071      /* launch the kernel */
1072      {
1073        size_t gsize[2];
1074        size_t wsize[2];
1075
1076        gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize);
1077        gsize[1] = image->rows;
1078        wsize[0] = chunkSize;
1079        wsize[1] = 1;
1080
1081        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1082        if (clStatus != CL_SUCCESS)
1083        {
1084          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1085          goto cleanup;
1086        }
1087        clEnv->library->clFlush(queue);
1088      }
1089    }
1090
1091    {
1092      /* need logic to decide this value */
1093      int chunkSize = 256;
1094
1095      {
1096        imageColumns = image->columns;
1097        imageRows = image->rows;
1098
1099        /* set the kernel arguments */
1100        i = 0;
1101        clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1102        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1103        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
1104        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1105        kernelWidth = kernel->width;
1106        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1107        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1108        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1109        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
1110        if (clStatus != CL_SUCCESS)
1111        {
1112          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1113          goto cleanup;
1114        }
1115      }
1116
1117      /* launch the kernel */
1118      {
1119        size_t gsize[2];
1120        size_t wsize[2];
1121
1122        gsize[0] = image->columns;
1123        gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize);
1124        wsize[0] = 1;
1125        wsize[1] = chunkSize;
1126
1127        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1128        if (clStatus != CL_SUCCESS)
1129        {
1130          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1131          goto cleanup;
1132        }
1133        clEnv->library->clFlush(queue);
1134      }
1135    }
1136
1137  }
1138
1139  /* get result */
1140  if (ALIGNED(filteredPixels,CLPixelPacket))
1141  {
1142    length = image->columns * image->rows;
1143    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1144  }
1145  else
1146  {
1147    length = image->columns * image->rows;
1148    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1149  }
1150  if (clStatus != CL_SUCCESS)
1151  {
1152    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
1153    goto cleanup;
1154  }
1155
1156  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1157
1158cleanup:
1159  OpenCLLogException(__FUNCTION__,__LINE__,exception);
1160
1161  image_view=DestroyCacheView(image_view);
1162  if (filteredImage_view != NULL)
1163    filteredImage_view=DestroyCacheView(filteredImage_view);
1164
1165  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
1166  if (tempImageBuffer!=NULL)      clEnv->library->clReleaseMemObject(tempImageBuffer);
1167  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
1168  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
1169  if (blurRowKernel!=NULL)        RelinquishOpenCLKernel(clEnv, blurRowKernel);
1170  if (blurColumnKernel!=NULL)     RelinquishOpenCLKernel(clEnv, blurColumnKernel);
1171  if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
1172  if (kernel!=NULL)               DestroyKernelInfo(kernel);
1173  if (outputReady == MagickFalse && filteredImage != NULL)
1174    filteredImage=DestroyImage(filteredImage);
1175  return(filteredImage);
1176}
1177
1178static Image* ComputeBlurImageSection(const Image* image,
1179  const ChannelType channel,const double radius,const double sigma,
1180  ExceptionInfo *exception)
1181{
1182  CacheView
1183    *filteredImage_view,
1184    *image_view;
1185
1186  char
1187    geometry[MaxTextExtent];
1188
1189  cl_command_queue
1190    queue;
1191
1192  cl_int
1193    clStatus;
1194
1195  cl_kernel
1196    blurColumnKernel,
1197    blurRowKernel;
1198
1199  cl_mem
1200    imageBuffer,
1201    tempImageBuffer,
1202    filteredImageBuffer,
1203    imageKernelBuffer;
1204
1205  cl_mem_flags
1206    mem_flags;
1207
1208  cl_context
1209    context;
1210
1211  const void
1212    *inputPixels;
1213
1214  float
1215    *kernelBufferPtr;
1216
1217  Image
1218    *filteredImage;
1219
1220  KernelInfo
1221    *kernel;
1222
1223  MagickBooleanType
1224    outputReady;
1225
1226  MagickCLEnv
1227    clEnv;
1228
1229  MagickSizeType
1230    length;
1231
1232  unsigned int
1233    i,
1234    imageColumns,
1235    imageRows,
1236    kernelWidth;
1237
1238  void
1239    *filteredPixels,
1240    *hostPtr;
1241
1242  context = NULL;
1243  filteredImage = NULL;
1244  filteredImage_view = NULL;
1245  imageBuffer = NULL;
1246  tempImageBuffer = NULL;
1247  filteredImageBuffer = NULL;
1248  imageKernelBuffer = NULL;
1249  blurRowKernel = NULL;
1250  blurColumnKernel = NULL;
1251  queue = NULL;
1252  kernel = NULL;
1253
1254  outputReady = MagickFalse;
1255
1256  clEnv = GetDefaultOpenCLEnv();
1257  context = GetOpenCLContext(clEnv);
1258  queue = AcquireOpenCLCommandQueue(clEnv);
1259
1260  /* Create and initialize OpenCL buffers. */
1261  {
1262    image_view=AcquireVirtualCacheView(image,exception);
1263    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
1264    if (inputPixels == (const void *) NULL)
1265    {
1266      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1267      goto cleanup;
1268    }
1269    /* If the host pointer is aligned to the size of CLPixelPacket,
1270     then use the host buffer directly from the GPU; otherwise,
1271     create a buffer on the GPU and copy the data over */
1272    if (ALIGNED(inputPixels,CLPixelPacket))
1273    {
1274      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1275    }
1276    else
1277    {
1278      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1279    }
1280    /* create a CL buffer from image pixel buffer */
1281    length = image->columns * image->rows;
1282    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1283    if (clStatus != CL_SUCCESS)
1284    {
1285      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1286      goto cleanup;
1287    }
1288  }
1289
1290  /* create output */
1291  {
1292    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
1293    assert(filteredImage != NULL);
1294    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1295    {
1296      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1297      goto cleanup;
1298    }
1299    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1300    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1301    if (filteredPixels == (void *) NULL)
1302    {
1303      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1304      goto cleanup;
1305    }
1306
1307    if (ALIGNED(filteredPixels,CLPixelPacket))
1308    {
1309      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1310      hostPtr = filteredPixels;
1311    }
1312    else
1313    {
1314      mem_flags = CL_MEM_WRITE_ONLY;
1315      hostPtr = NULL;
1316    }
1317    /* create a CL buffer from image pixel buffer */
1318    length = image->columns * image->rows;
1319    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1320    if (clStatus != CL_SUCCESS)
1321    {
1322      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1323      goto cleanup;
1324    }
1325  }
1326
1327  /* create processing kernel */
1328  {
1329    (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1330    kernel=AcquireKernelInfo(geometry);
1331    if (kernel == (KernelInfo *) NULL)
1332    {
1333      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
1334      goto cleanup;
1335    }
1336
1337    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
1338    if (clStatus != CL_SUCCESS)
1339    {
1340      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1341      goto cleanup;
1342    }
1343    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1344    if (clStatus != CL_SUCCESS)
1345    {
1346      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1347      goto cleanup;
1348    }
1349
1350    for (i = 0; i < kernel->width; i++)
1351    {
1352      kernelBufferPtr[i] = (float) kernel->values[i];
1353    }
1354
1355    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1356    if (clStatus != CL_SUCCESS)
1357    {
1358      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
1359      goto cleanup;
1360    }
1361  }
1362
1363  {
1364    unsigned int offsetRows;
1365    unsigned int sec;
1366
1367    /* create temp buffer */
1368    {
1369      length = image->columns * (image->rows / 2 + 1 + (kernel->width-1) / 2);
1370      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1371      if (clStatus != CL_SUCCESS)
1372      {
1373        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1374        goto cleanup;
1375      }
1376    }
1377
1378    /* get the OpenCL kernels */
1379    {
1380      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
1381      if (blurRowKernel == NULL)
1382      {
1383        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1384        goto cleanup;
1385      };
1386
1387      blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumnSection");
1388      if (blurColumnKernel == NULL)
1389      {
1390        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1391        goto cleanup;
1392      };
1393    }
1394
1395    for (sec = 0; sec < 2; sec++)
1396    {
1397      {
1398        /* need logic to decide this value */
1399        int chunkSize = 256;
1400
1401        {
1402          imageColumns = image->columns;
1403          if (sec == 0)
1404            imageRows = image->rows / 2 + (kernel->width-1) / 2;
1405          else
1406            imageRows = (image->rows - image->rows / 2) + (kernel->width-1) / 2;
1407
1408          offsetRows = sec * image->rows / 2;
1409
1410          kernelWidth = kernel->width;
1411
1412          /* set the kernel arguments */
1413          i = 0;
1414          clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1415          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1416          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1417          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1418          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1419          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1420          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1421          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
1422          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1423          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
1424          if (clStatus != CL_SUCCESS)
1425          {
1426            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1427            goto cleanup;
1428          }
1429        }
1430
1431        /* launch the kernel */
1432        {
1433          size_t gsize[2];
1434          size_t wsize[2];
1435
1436          gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
1437          gsize[1] = imageRows;
1438          wsize[0] = chunkSize;
1439          wsize[1] = 1;
1440
1441          clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1442          if (clStatus != CL_SUCCESS)
1443          {
1444            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1445            goto cleanup;
1446          }
1447          clEnv->library->clFlush(queue);
1448        }
1449      }
1450
1451      {
1452        /* need logic to decide this value */
1453        int chunkSize = 256;
1454
1455        {
1456          imageColumns = image->columns;
1457          if (sec == 0)
1458            imageRows = image->rows / 2;
1459          else
1460            imageRows = (image->rows - image->rows / 2);
1461
1462          offsetRows = sec * image->rows / 2;
1463
1464          kernelWidth = kernel->width;
1465
1466          /* set the kernel arguments */
1467          i = 0;
1468          clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1469          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1470          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
1471          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1472          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1473          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1474          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1475          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *)NULL);
1476          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1477          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
1478          if (clStatus != CL_SUCCESS)
1479          {
1480            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1481            goto cleanup;
1482          }
1483        }
1484
1485        /* launch the kernel */
1486        {
1487          size_t gsize[2];
1488          size_t wsize[2];
1489
1490          gsize[0] = imageColumns;
1491          gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
1492          wsize[0] = 1;
1493          wsize[1] = chunkSize;
1494
1495          clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
1496          if (clStatus != CL_SUCCESS)
1497          {
1498            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1499            goto cleanup;
1500          }
1501          clEnv->library->clFlush(queue);
1502        }
1503      }
1504    }
1505
1506  }
1507
1508  /* get result */
1509  if (ALIGNED(filteredPixels,CLPixelPacket))
1510  {
1511    length = image->columns * image->rows;
1512    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1513  }
1514  else
1515  {
1516    length = image->columns * image->rows;
1517    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1518  }
1519  if (clStatus != CL_SUCCESS)
1520  {
1521    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
1522    goto cleanup;
1523  }
1524
1525  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1526
1527cleanup:
1528  OpenCLLogException(__FUNCTION__,__LINE__,exception);
1529
1530  image_view=DestroyCacheView(image_view);
1531  if (filteredImage_view != NULL)
1532    filteredImage_view=DestroyCacheView(filteredImage_view);
1533
1534  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
1535  if (tempImageBuffer!=NULL)      clEnv->library->clReleaseMemObject(tempImageBuffer);
1536  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
1537  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
1538  if (blurRowKernel!=NULL)        RelinquishOpenCLKernel(clEnv, blurRowKernel);
1539  if (blurColumnKernel!=NULL)     RelinquishOpenCLKernel(clEnv, blurColumnKernel);
1540  if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
1541  if (kernel!=NULL)               DestroyKernelInfo(kernel);
1542  if (outputReady == MagickFalse)
1543  {
1544    if (filteredImage != NULL)
1545    {
1546      DestroyImage(filteredImage);
1547      filteredImage = NULL;
1548    }
1549  }
1550  return filteredImage;
1551}
1552
1553static Image *ComputeUnsharpMaskImageSingle(const Image *image,
1554  const ChannelType channel,const double radius,const double sigma,
1555  const double gain,const double threshold,int blurOnly, ExceptionInfo *exception);
1556
1557static Image* ComputeBlurImageSingle(const Image* image,
1558  const ChannelType channel,const double radius,const double sigma,
1559  ExceptionInfo *exception)
1560{
1561  return ComputeUnsharpMaskImageSingle(image, channel, radius, sigma, 0.0, 0.0, 1, exception);
1562}
1563
1564MagickExport Image* AccelerateBlurImage(const Image *image,
1565  const ChannelType channel,const double radius,const double sigma,
1566  ExceptionInfo *exception)
1567{
1568  Image
1569    *filteredImage;
1570
1571  assert(image != NULL);
1572  assert(exception != (ExceptionInfo *) NULL);
1573
1574  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
1575      (checkAccelerateCondition(image, channel) == MagickFalse))
1576    return NULL;
1577
1578  if (radius < 12.1)
1579	filteredImage=ComputeBlurImageSingle(image, channel, radius, sigma, exception);
1580  else if (splitImage(image) && (image->rows / 2 > radius))
1581    filteredImage=ComputeBlurImageSection(image, channel, radius, sigma, exception);
1582  else
1583    filteredImage=ComputeBlurImage(image, channel, radius, sigma, exception);
1584
1585  return(filteredImage);
1586}
1587
1588/*
1589%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1590%                                                                             %
1591%                                                                             %
1592%                                                                             %
1593%     R o t a t i o n a l B l u r I m a g e  w i t h  O p e n C L             %
1594%                                                                             %
1595%                                                                             %
1596%                                                                             %
1597%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1598%
1599%  RotationalBlurImage() applies a rotational blur to the image.
1600%
1601%  Andrew Protano contributed this effect.
1602%
1603%  The format of the RotationalBlurImage method is:
1604%
1605%    Image *RotationalBlurImage(const Image *image,const double angle,
1606%      ExceptionInfo *exception)
1607%    Image *RotationalBlurImageChannel(const Image *image,const ChannelType channel,
1608%      const double angle,ExceptionInfo *exception)
1609%
1610%  A description of each parameter follows:
1611%
1612%    o image: the image.
1613%
1614%    o channel: the channel type.
1615%
1616%    o angle: the angle of the rotational blur.
1617%
1618%    o exception: return any errors or warnings in this structure.
1619%
1620*/
1621
1622static Image* ComputeRotationalBlurImage(const Image *image,
1623  const ChannelType channel,const double angle,ExceptionInfo *exception)
1624{
1625  CacheView
1626    *image_view,
1627    *filteredImage_view;
1628
1629  cl_command_queue
1630    queue;
1631
1632  cl_context
1633    context;
1634
1635  cl_float2
1636    blurCenter;
1637
1638  cl_float4
1639    biasPixel;
1640
1641  cl_int
1642    clStatus;
1643
1644  cl_mem
1645    cosThetaBuffer,
1646    filteredImageBuffer,
1647    imageBuffer,
1648    sinThetaBuffer;
1649
1650  cl_mem_flags
1651    mem_flags;
1652
1653  cl_kernel
1654    rotationalBlurKernel;
1655
1656  const void
1657    *inputPixels;
1658
1659  float
1660    blurRadius,
1661    *cosThetaPtr,
1662    offset,
1663    *sinThetaPtr,
1664    theta;
1665
1666  Image
1667    *filteredImage;
1668
1669  MagickBooleanType
1670    outputReady;
1671
1672  MagickCLEnv
1673    clEnv;
1674
1675  PixelInfo
1676    bias;
1677
1678  MagickSizeType
1679    length;
1680
1681  size_t
1682    global_work_size[2];
1683
1684  unsigned int
1685    cossin_theta_size,
1686    i,
1687    matte;
1688
1689  void
1690    *filteredPixels,
1691    *hostPtr;
1692
1693  outputReady = MagickFalse;
1694  context = NULL;
1695  filteredImage = NULL;
1696  filteredImage_view = NULL;
1697  imageBuffer = NULL;
1698  filteredImageBuffer = NULL;
1699  sinThetaBuffer = NULL;
1700  cosThetaBuffer = NULL;
1701  queue = NULL;
1702  rotationalBlurKernel = NULL;
1703
1704
1705  clEnv = GetDefaultOpenCLEnv();
1706  context = GetOpenCLContext(clEnv);
1707
1708
1709  /* Create and initialize OpenCL buffers. */
1710
1711  image_view=AcquireVirtualCacheView(image,exception);
1712  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
1713  if (inputPixels == (const void *) NULL)
1714  {
1715    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1716    goto cleanup;
1717  }
1718
1719  /* If the host pointer is aligned to the size of CLPixelPacket,
1720     then use the host buffer directly from the GPU; otherwise,
1721     create a buffer on the GPU and copy the data over */
1722  if (ALIGNED(inputPixels,CLPixelPacket))
1723  {
1724    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1725  }
1726  else
1727  {
1728    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1729  }
1730  /* create a CL buffer from image pixel buffer */
1731  length = image->columns * image->rows;
1732  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1733  if (clStatus != CL_SUCCESS)
1734  {
1735    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1736    goto cleanup;
1737  }
1738
1739
1740  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
1741  assert(filteredImage != NULL);
1742  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1743  {
1744    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1745    goto cleanup;
1746  }
1747  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1748  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1749  if (filteredPixels == (void *) NULL)
1750  {
1751    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1752    goto cleanup;
1753  }
1754
1755  if (ALIGNED(filteredPixels,CLPixelPacket))
1756  {
1757    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1758    hostPtr = filteredPixels;
1759  }
1760  else
1761  {
1762    mem_flags = CL_MEM_WRITE_ONLY;
1763    hostPtr = NULL;
1764  }
1765  /* create a CL buffer from image pixel buffer */
1766  length = image->columns * image->rows;
1767  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1768  if (clStatus != CL_SUCCESS)
1769  {
1770    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1771    goto cleanup;
1772  }
1773
1774  blurCenter.s[0] = (float) (image->columns-1)/2.0;
1775  blurCenter.s[1] = (float) (image->rows-1)/2.0;
1776  blurRadius=hypot(blurCenter.s[0],blurCenter.s[1]);
1777  cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
1778
1779  /* create a buffer for sin_theta and cos_theta */
1780  sinThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1781  if (clStatus != CL_SUCCESS)
1782  {
1783    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1784    goto cleanup;
1785  }
1786  cosThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
1787  if (clStatus != CL_SUCCESS)
1788  {
1789    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1790    goto cleanup;
1791  }
1792
1793
1794  queue = AcquireOpenCLCommandQueue(clEnv);
1795  sinThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1796  if (clStatus != CL_SUCCESS)
1797  {
1798    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
1799    goto cleanup;
1800  }
1801
1802  cosThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
1803  if (clStatus != CL_SUCCESS)
1804  {
1805    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
1806    goto cleanup;
1807  }
1808
1809  theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
1810  offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
1811  for (i=0; i < (ssize_t) cossin_theta_size; i++)
1812  {
1813    cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
1814    sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
1815  }
1816
1817  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
1818  clStatus |= clEnv->library->clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
1819  if (clStatus != CL_SUCCESS)
1820  {
1821    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
1822    goto cleanup;
1823  }
1824
1825  /* get the OpenCL kernel */
1826  rotationalBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RotationalBlur");
1827  if (rotationalBlurKernel == NULL)
1828  {
1829    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1830    goto cleanup;
1831  }
1832
1833
1834  /* set the kernel arguments */
1835  i = 0;
1836  clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1837  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1838
1839  GetPixelInfo(image,&bias);
1840  biasPixel.s[0] = bias.red;
1841  biasPixel.s[1] = bias.green;
1842  biasPixel.s[2] = bias.blue;
1843  biasPixel.s[3] = bias.alpha;
1844  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float4), &biasPixel);
1845  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &channel);
1846
1847  matte = (image->alpha_trait==BlendPixelTrait)?1:0;
1848  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &matte);
1849
1850  clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
1851
1852  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
1853  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
1854  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
1855  if (clStatus != CL_SUCCESS)
1856  {
1857    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1858    goto cleanup;
1859  }
1860
1861
1862  global_work_size[0] = image->columns;
1863  global_work_size[1] = image->rows;
1864  /* launch the kernel */
1865  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, rotationalBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
1866  if (clStatus != CL_SUCCESS)
1867  {
1868    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1869    goto cleanup;
1870  }
1871  clEnv->library->clFlush(queue);
1872
1873  if (ALIGNED(filteredPixels,CLPixelPacket))
1874  {
1875    length = image->columns * image->rows;
1876    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1877  }
1878  else
1879  {
1880    length = image->columns * image->rows;
1881    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1882  }
1883  if (clStatus != CL_SUCCESS)
1884  {
1885    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
1886    goto cleanup;
1887  }
1888  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1889
1890cleanup:
1891  OpenCLLogException(__FUNCTION__,__LINE__,exception);
1892
1893  image_view=DestroyCacheView(image_view);
1894  if (filteredImage_view != NULL)
1895    filteredImage_view=DestroyCacheView(filteredImage_view);
1896
1897  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
1898  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
1899  if (sinThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(sinThetaBuffer);
1900  if (cosThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(cosThetaBuffer);
1901  if (rotationalBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, rotationalBlurKernel);
1902  if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
1903  if (outputReady == MagickFalse)
1904  {
1905    if (filteredImage != NULL)
1906    {
1907      DestroyImage(filteredImage);
1908      filteredImage = NULL;
1909    }
1910  }
1911  return filteredImage;
1912}
1913
1914MagickExport Image* AccelerateRotationalBlurImage(const Image *image,
1915  const ChannelType channel,const double angle,ExceptionInfo *exception)
1916{
1917  Image
1918    *filteredImage;
1919
1920  assert(image != NULL);
1921  assert(exception != (ExceptionInfo *) NULL);
1922
1923  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
1924      (checkAccelerateCondition(image, channel) == MagickFalse))
1925    return NULL;
1926
1927  filteredImage=ComputeRotationalBlurImage(image, channel, angle, exception);
1928  return filteredImage;
1929}
1930
1931/*
1932%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1933%                                                                             %
1934%                                                                             %
1935%                                                                             %
1936%     U n s h a r p M a s k I m a g e  w i t h  O p e n C L                   %
1937%                                                                             %
1938%                                                                             %
1939%                                                                             %
1940%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1941%
1942%  UnsharpMaskImage() sharpens one or more image channels.  We convolve the
1943%  image with a Gaussian operator of the given radius and standard deviation
1944%  (sigma).  For reasonable results, radius should be larger than sigma.  Use a
1945%  radius of 0 and UnsharpMaskImage() selects a suitable radius for you.
1946%
1947%  The format of the UnsharpMaskImage method is:
1948%
1949%    Image *UnsharpMaskImage(const Image *image,const double radius,
1950%      const double sigma,const double amount,const double threshold,
1951%      ExceptionInfo *exception)
1952%    Image *UnsharpMaskImageChannel(const Image *image,
1953%      const ChannelType channel,const double radius,const double sigma,
1954%      const double gain,const double threshold,ExceptionInfo *exception)
1955%
1956%  A description of each parameter follows:
1957%
1958%    o image: the image.
1959%
1960%    o channel: the channel type.
1961%
1962%    o radius: the radius of the Gaussian, in pixels, not counting the center
1963%      pixel.
1964%
1965%    o sigma: the standard deviation of the Gaussian, in pixels.
1966%
1967%    o gain: the percentage of the difference between the original and the
1968%      blur image that is added back into the original.
1969%
1970%    o threshold: the threshold in pixels needed to apply the diffence gain.
1971%
1972%    o exception: return any errors or warnings in this structure.
1973%
1974*/
1975
1976static Image *ComputeUnsharpMaskImage(const Image *image,
1977  const ChannelType channel,const double radius,const double sigma,
1978  const double gain,const double threshold,ExceptionInfo *exception)
1979{
1980  CacheView
1981    *filteredImage_view,
1982    *image_view;
1983
1984  char
1985    geometry[MaxTextExtent];
1986
1987  cl_command_queue
1988    queue;
1989
1990  cl_context
1991    context;
1992
1993  cl_int
1994    clStatus;
1995
1996  cl_kernel
1997    blurRowKernel,
1998    unsharpMaskBlurColumnKernel;
1999
2000  cl_mem
2001    filteredImageBuffer,
2002    imageBuffer,
2003    imageKernelBuffer,
2004    tempImageBuffer;
2005
2006  cl_mem_flags
2007    mem_flags;
2008
2009  const void
2010    *inputPixels;
2011
2012  float
2013    fGain,
2014    fThreshold,
2015    *kernelBufferPtr;
2016
2017  Image
2018    *filteredImage;
2019
2020  int
2021    chunkSize;
2022
2023  KernelInfo
2024    *kernel;
2025
2026  MagickBooleanType
2027    outputReady;
2028
2029  MagickCLEnv
2030    clEnv;
2031
2032  MagickSizeType
2033    length;
2034
2035  void
2036    *filteredPixels,
2037    *hostPtr;
2038
2039  unsigned int
2040    i,
2041    imageColumns,
2042    imageRows,
2043    kernelWidth;
2044
2045  clEnv = NULL;
2046  filteredImage = NULL;
2047  filteredImage_view = NULL;
2048  kernel = NULL;
2049  context = NULL;
2050  imageBuffer = NULL;
2051  filteredImageBuffer = NULL;
2052  tempImageBuffer = NULL;
2053  imageKernelBuffer = NULL;
2054  blurRowKernel = NULL;
2055  unsharpMaskBlurColumnKernel = NULL;
2056  queue = NULL;
2057  outputReady = MagickFalse;
2058
2059  clEnv = GetDefaultOpenCLEnv();
2060  context = GetOpenCLContext(clEnv);
2061  queue = AcquireOpenCLCommandQueue(clEnv);
2062
2063  /* Create and initialize OpenCL buffers. */
2064  {
2065    image_view=AcquireVirtualCacheView(image,exception);
2066    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
2067    if (inputPixels == (const void *) NULL)
2068    {
2069      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2070      goto cleanup;
2071    }
2072
2073    /* If the host pointer is aligned to the size of CLPixelPacket,
2074     then use the host buffer directly from the GPU; otherwise,
2075     create a buffer on the GPU and copy the data over */
2076    if (ALIGNED(inputPixels,CLPixelPacket))
2077    {
2078      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2079    }
2080    else
2081    {
2082      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2083    }
2084    /* create a CL buffer from image pixel buffer */
2085    length = image->columns * image->rows;
2086    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2087    if (clStatus != CL_SUCCESS)
2088    {
2089      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2090      goto cleanup;
2091    }
2092  }
2093
2094  /* create output */
2095  {
2096    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
2097    assert(filteredImage != NULL);
2098    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
2099    {
2100      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2101      goto cleanup;
2102    }
2103    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
2104    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
2105    if (filteredPixels == (void *) NULL)
2106    {
2107      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2108      goto cleanup;
2109    }
2110
2111    if (ALIGNED(filteredPixels,CLPixelPacket))
2112    {
2113      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2114      hostPtr = filteredPixels;
2115    }
2116    else
2117    {
2118      mem_flags = CL_MEM_WRITE_ONLY;
2119      hostPtr = NULL;
2120    }
2121
2122    /* create a CL buffer from image pixel buffer */
2123    length = image->columns * image->rows;
2124    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2125    if (clStatus != CL_SUCCESS)
2126    {
2127      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2128      goto cleanup;
2129    }
2130  }
2131
2132  /* create the blur kernel */
2133  {
2134    (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
2135    kernel=AcquireKernelInfo(geometry);
2136    if (kernel == (KernelInfo *) NULL)
2137    {
2138      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
2139      goto cleanup;
2140    }
2141
2142    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
2143    if (clStatus != CL_SUCCESS)
2144    {
2145      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2146      goto cleanup;
2147    }
2148
2149
2150    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
2151    if (clStatus != CL_SUCCESS)
2152    {
2153      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
2154      goto cleanup;
2155    }
2156    for (i = 0; i < kernel->width; i++)
2157    {
2158      kernelBufferPtr[i] = (float) kernel->values[i];
2159    }
2160    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
2161    if (clStatus != CL_SUCCESS)
2162    {
2163      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
2164      goto cleanup;
2165    }
2166  }
2167
2168  {
2169    /* create temp buffer */
2170    {
2171      length = image->columns * image->rows;
2172      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
2173      if (clStatus != CL_SUCCESS)
2174      {
2175        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2176        goto cleanup;
2177      }
2178    }
2179
2180    /* get the opencl kernel */
2181    {
2182      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
2183      if (blurRowKernel == NULL)
2184      {
2185        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2186        goto cleanup;
2187      };
2188
2189      unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
2190      if (unsharpMaskBlurColumnKernel == NULL)
2191      {
2192        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2193        goto cleanup;
2194      };
2195    }
2196
2197    {
2198      chunkSize = 256;
2199
2200      imageColumns = image->columns;
2201      imageRows = image->rows;
2202
2203      kernelWidth = kernel->width;
2204
2205      /* set the kernel arguments */
2206      i = 0;
2207      clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2208      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2209      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
2210      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2211      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2212      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2213      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2214      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
2215      if (clStatus != CL_SUCCESS)
2216      {
2217        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2218        goto cleanup;
2219      }
2220    }
2221
2222    /* launch the kernel */
2223    {
2224      size_t gsize[2];
2225      size_t wsize[2];
2226
2227      gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize);
2228      gsize[1] = image->rows;
2229      wsize[0] = chunkSize;
2230      wsize[1] = 1;
2231
2232      clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2233      if (clStatus != CL_SUCCESS)
2234      {
2235        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2236        goto cleanup;
2237      }
2238      clEnv->library->clFlush(queue);
2239    }
2240
2241
2242    {
2243      chunkSize = 256;
2244      imageColumns = image->columns;
2245      imageRows = image->rows;
2246      kernelWidth = kernel->width;
2247      fGain = (float)gain;
2248      fThreshold = (float)threshold;
2249
2250      i = 0;
2251      clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2252      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2253      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2254      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2255      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2256      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
2257      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
2258      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
2259      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2260      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2261      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
2262      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
2263
2264      if (clStatus != CL_SUCCESS)
2265      {
2266        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2267        goto cleanup;
2268      }
2269    }
2270
2271    /* launch the kernel */
2272    {
2273      size_t gsize[2];
2274      size_t wsize[2];
2275
2276      gsize[0] = image->columns;
2277      gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize);
2278      wsize[0] = 1;
2279      wsize[1] = chunkSize;
2280
2281      clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2282      if (clStatus != CL_SUCCESS)
2283      {
2284        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2285        goto cleanup;
2286      }
2287      clEnv->library->clFlush(queue);
2288    }
2289
2290  }
2291
2292  /* get result */
2293  if (ALIGNED(filteredPixels,CLPixelPacket))
2294  {
2295    length = image->columns * image->rows;
2296    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2297  }
2298  else
2299  {
2300    length = image->columns * image->rows;
2301    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2302  }
2303  if (clStatus != CL_SUCCESS)
2304  {
2305    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
2306    goto cleanup;
2307  }
2308
2309  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2310
2311cleanup:
2312  OpenCLLogException(__FUNCTION__,__LINE__,exception);
2313
2314  image_view=DestroyCacheView(image_view);
2315  if (filteredImage_view != NULL)
2316    filteredImage_view=DestroyCacheView(filteredImage_view);
2317
2318  if (kernel != NULL)			      kernel=DestroyKernelInfo(kernel);
2319  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
2320  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
2321  if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
2322  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
2323  if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
2324  if (unsharpMaskBlurColumnKernel!=NULL)      RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
2325  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
2326  if (outputReady == MagickFalse)
2327  {
2328    if (filteredImage != NULL)
2329    {
2330      DestroyImage(filteredImage);
2331      filteredImage = NULL;
2332    }
2333  }
2334  return(filteredImage);
2335}
2336
2337static Image *ComputeUnsharpMaskImageSection(const Image *image,
2338  const ChannelType channel,const double radius,const double sigma,
2339  const double gain,const double threshold,ExceptionInfo *exception)
2340{
2341  CacheView
2342    *filteredImage_view,
2343    *image_view;
2344
2345  char
2346    geometry[MaxTextExtent];
2347
2348  cl_command_queue
2349    queue;
2350
2351  cl_context
2352    context;
2353
2354  cl_int
2355    clStatus;
2356
2357  cl_kernel
2358    blurRowKernel,
2359    unsharpMaskBlurColumnKernel;
2360
2361  cl_mem
2362    filteredImageBuffer,
2363    imageBuffer,
2364    imageKernelBuffer,
2365    tempImageBuffer;
2366
2367  cl_mem_flags
2368    mem_flags;
2369
2370  const void
2371    *inputPixels;
2372
2373  float
2374    fGain,
2375    fThreshold,
2376    *kernelBufferPtr;
2377
2378  Image
2379    *filteredImage;
2380
2381  int
2382    chunkSize;
2383
2384  KernelInfo
2385    *kernel;
2386
2387  MagickBooleanType
2388    outputReady;
2389
2390  MagickCLEnv
2391    clEnv;
2392
2393  MagickSizeType
2394    length;
2395
2396  void
2397    *filteredPixels,
2398    *hostPtr;
2399
2400  unsigned int
2401    i,
2402    imageColumns,
2403    imageRows,
2404    kernelWidth;
2405
2406  clEnv = NULL;
2407  filteredImage = NULL;
2408  filteredImage_view = NULL;
2409  kernel = NULL;
2410  context = NULL;
2411  imageBuffer = NULL;
2412  filteredImageBuffer = NULL;
2413  tempImageBuffer = NULL;
2414  imageKernelBuffer = NULL;
2415  blurRowKernel = NULL;
2416  unsharpMaskBlurColumnKernel = NULL;
2417  queue = NULL;
2418  outputReady = MagickFalse;
2419
2420  clEnv = GetDefaultOpenCLEnv();
2421  context = GetOpenCLContext(clEnv);
2422  queue = AcquireOpenCLCommandQueue(clEnv);
2423
2424  /* Create and initialize OpenCL buffers. */
2425  {
2426    image_view=AcquireVirtualCacheView(image,exception);
2427    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
2428    if (inputPixels == (const void *) NULL)
2429    {
2430      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2431      goto cleanup;
2432    }
2433
2434    /* If the host pointer is aligned to the size of CLPixelPacket,
2435     then use the host buffer directly from the GPU; otherwise,
2436     create a buffer on the GPU and copy the data over */
2437    if (ALIGNED(inputPixels,CLPixelPacket))
2438    {
2439      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2440    }
2441    else
2442    {
2443      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2444    }
2445    /* create a CL buffer from image pixel buffer */
2446    length = image->columns * image->rows;
2447    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2448    if (clStatus != CL_SUCCESS)
2449    {
2450      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2451      goto cleanup;
2452    }
2453  }
2454
2455  /* create output */
2456  {
2457    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
2458    assert(filteredImage != NULL);
2459    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
2460    {
2461      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2462      goto cleanup;
2463    }
2464    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
2465    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
2466    if (filteredPixels == (void *) NULL)
2467    {
2468      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2469      goto cleanup;
2470    }
2471
2472    if (ALIGNED(filteredPixels,CLPixelPacket))
2473    {
2474      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2475      hostPtr = filteredPixels;
2476    }
2477    else
2478    {
2479      mem_flags = CL_MEM_WRITE_ONLY;
2480      hostPtr = NULL;
2481    }
2482
2483    /* create a CL buffer from image pixel buffer */
2484    length = image->columns * image->rows;
2485    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2486    if (clStatus != CL_SUCCESS)
2487    {
2488      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2489      goto cleanup;
2490    }
2491  }
2492
2493  /* create the blur kernel */
2494  {
2495    (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
2496    kernel=AcquireKernelInfo(geometry);
2497    if (kernel == (KernelInfo *) NULL)
2498    {
2499      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
2500      goto cleanup;
2501    }
2502
2503    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
2504    if (clStatus != CL_SUCCESS)
2505    {
2506      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2507      goto cleanup;
2508    }
2509
2510
2511    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
2512    if (clStatus != CL_SUCCESS)
2513    {
2514      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
2515      goto cleanup;
2516    }
2517    for (i = 0; i < kernel->width; i++)
2518    {
2519      kernelBufferPtr[i] = (float) kernel->values[i];
2520    }
2521    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
2522    if (clStatus != CL_SUCCESS)
2523    {
2524      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
2525      goto cleanup;
2526    }
2527  }
2528
2529  {
2530    unsigned int offsetRows;
2531    unsigned int sec;
2532
2533    /* create temp buffer */
2534    {
2535      length = image->columns * (image->rows / 2 + 1 + (kernel->width-1) / 2);
2536      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
2537      if (clStatus != CL_SUCCESS)
2538      {
2539        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2540        goto cleanup;
2541      }
2542    }
2543
2544    /* get the opencl kernel */
2545    {
2546      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
2547      if (blurRowKernel == NULL)
2548      {
2549        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2550        goto cleanup;
2551      };
2552
2553      unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumnSection");
2554      if (unsharpMaskBlurColumnKernel == NULL)
2555      {
2556        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2557        goto cleanup;
2558      };
2559    }
2560
2561    for (sec = 0; sec < 2; sec++)
2562    {
2563      {
2564        chunkSize = 256;
2565
2566        imageColumns = image->columns;
2567        if (sec == 0)
2568          imageRows = image->rows / 2 + (kernel->width-1) / 2;
2569        else
2570          imageRows = (image->rows - image->rows / 2) + (kernel->width-1) / 2;
2571
2572        offsetRows = sec * image->rows / 2;
2573
2574        kernelWidth = kernel->width;
2575
2576        /* set the kernel arguments */
2577        i = 0;
2578        clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2579        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2580        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
2581        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2582        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2583        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2584        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2585        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *)NULL);
2586        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2587        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
2588        if (clStatus != CL_SUCCESS)
2589        {
2590          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2591          goto cleanup;
2592        }
2593      }
2594      /* launch the kernel */
2595      {
2596        size_t gsize[2];
2597        size_t wsize[2];
2598
2599        gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
2600        gsize[1] = imageRows;
2601        wsize[0] = chunkSize;
2602        wsize[1] = 1;
2603
2604        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2605        if (clStatus != CL_SUCCESS)
2606        {
2607          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2608          goto cleanup;
2609        }
2610        clEnv->library->clFlush(queue);
2611      }
2612
2613
2614      {
2615        chunkSize = 256;
2616
2617        imageColumns = image->columns;
2618        if (sec == 0)
2619          imageRows = image->rows / 2;
2620        else
2621          imageRows = (image->rows - image->rows / 2);
2622
2623        offsetRows = sec * image->rows / 2;
2624
2625        kernelWidth = kernel->width;
2626
2627        fGain = (float)gain;
2628        fThreshold = (float)threshold;
2629
2630        i = 0;
2631        clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2632        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
2633        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2634        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2635        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2636        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
2637        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
2638        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
2639        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2640        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2641        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
2642        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
2643        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
2644        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
2645
2646        if (clStatus != CL_SUCCESS)
2647        {
2648          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2649          goto cleanup;
2650        }
2651      }
2652
2653      /* launch the kernel */
2654      {
2655        size_t gsize[2];
2656        size_t wsize[2];
2657
2658        gsize[0] = imageColumns;
2659        gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
2660        wsize[0] = 1;
2661        wsize[1] = chunkSize;
2662
2663        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2664        if (clStatus != CL_SUCCESS)
2665        {
2666          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2667          goto cleanup;
2668        }
2669        clEnv->library->clFlush(queue);
2670      }
2671    }
2672  }
2673
2674  /* get result */
2675  if (ALIGNED(filteredPixels,CLPixelPacket))
2676  {
2677    length = image->columns * image->rows;
2678    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2679  }
2680  else
2681  {
2682    length = image->columns * image->rows;
2683    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2684  }
2685  if (clStatus != CL_SUCCESS)
2686  {
2687    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
2688    goto cleanup;
2689  }
2690
2691  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2692
2693cleanup:
2694  OpenCLLogException(__FUNCTION__,__LINE__,exception);
2695
2696  image_view=DestroyCacheView(image_view);
2697  if (filteredImage_view != NULL)
2698    filteredImage_view=DestroyCacheView(filteredImage_view);
2699
2700  if (kernel != NULL)			      kernel=DestroyKernelInfo(kernel);
2701  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
2702  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
2703  if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
2704  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
2705  if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
2706  if (unsharpMaskBlurColumnKernel!=NULL)      RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
2707  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
2708  if (outputReady == MagickFalse)
2709  {
2710    if (filteredImage != NULL)
2711    {
2712      DestroyImage(filteredImage);
2713      filteredImage = NULL;
2714    }
2715  }
2716  return filteredImage;
2717}
2718
2719static Image *ComputeUnsharpMaskImageSingle(const Image *image,
2720  const ChannelType channel,const double radius,const double sigma,
2721  const double gain,const double threshold,int blurOnly, ExceptionInfo *exception)
2722{
2723  CacheView
2724    *filteredImage_view,
2725    *image_view;
2726
2727  char
2728    geometry[MaxTextExtent];
2729
2730  cl_command_queue
2731    queue;
2732
2733  cl_context
2734    context;
2735
2736  cl_int
2737    justBlur,
2738    clStatus;
2739
2740  cl_kernel
2741    unsharpMaskKernel;
2742
2743  cl_mem
2744    filteredImageBuffer,
2745    imageBuffer,
2746    imageKernelBuffer;
2747
2748  cl_mem_flags
2749    mem_flags;
2750
2751  const void
2752    *inputPixels;
2753
2754  float
2755    fGain,
2756    fThreshold,
2757    *kernelBufferPtr;
2758
2759  Image
2760    *filteredImage;
2761
2762  KernelInfo
2763    *kernel;
2764
2765  MagickBooleanType
2766    outputReady;
2767
2768  MagickCLEnv
2769    clEnv;
2770
2771  MagickSizeType
2772    length;
2773
2774  void
2775    *filteredPixels,
2776    *hostPtr;
2777
2778  unsigned int
2779    i,
2780    imageColumns,
2781    imageRows,
2782    kernelWidth;
2783
2784  clEnv = NULL;
2785  filteredImage = NULL;
2786  filteredImage_view = NULL;
2787  kernel = NULL;
2788  context = NULL;
2789  imageBuffer = NULL;
2790  filteredImageBuffer = NULL;
2791  imageKernelBuffer = NULL;
2792  unsharpMaskKernel = NULL;
2793  queue = NULL;
2794  outputReady = MagickFalse;
2795
2796  clEnv = GetDefaultOpenCLEnv();
2797  context = GetOpenCLContext(clEnv);
2798  queue = AcquireOpenCLCommandQueue(clEnv);
2799
2800  /* Create and initialize OpenCL buffers. */
2801  {
2802    image_view=AcquireVirtualCacheView(image,exception);
2803    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
2804    if (inputPixels == (const void *) NULL)
2805    {
2806      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2807      goto cleanup;
2808    }
2809
2810    /* If the host pointer is aligned to the size of CLPixelPacket,
2811     then use the host buffer directly from the GPU; otherwise,
2812     create a buffer on the GPU and copy the data over */
2813    if (ALIGNED(inputPixels,CLPixelPacket))
2814    {
2815      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2816    }
2817    else
2818    {
2819      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2820    }
2821    /* create a CL buffer from image pixel buffer */
2822    length = image->columns * image->rows;
2823    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2824    if (clStatus != CL_SUCCESS)
2825    {
2826      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2827      goto cleanup;
2828    }
2829  }
2830
2831  /* create output */
2832  {
2833    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
2834    assert(filteredImage != NULL);
2835    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
2836    {
2837      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2838      goto cleanup;
2839    }
2840    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
2841    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
2842    if (filteredPixels == (void *) NULL)
2843    {
2844      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2845      goto cleanup;
2846    }
2847
2848    if (ALIGNED(filteredPixels,CLPixelPacket))
2849    {
2850      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2851      hostPtr = filteredPixels;
2852    }
2853    else
2854    {
2855      mem_flags = CL_MEM_WRITE_ONLY;
2856      hostPtr = NULL;
2857    }
2858
2859    /* create a CL buffer from image pixel buffer */
2860    length = image->columns * image->rows;
2861    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2862    if (clStatus != CL_SUCCESS)
2863    {
2864      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2865      goto cleanup;
2866    }
2867  }
2868
2869  /* create the blur kernel */
2870  {
2871    (void) FormatLocaleString(geometry,MaxTextExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
2872    kernel=AcquireKernelInfo(geometry);
2873    if (kernel == (KernelInfo *) NULL)
2874    {
2875      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
2876      goto cleanup;
2877    }
2878
2879    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
2880    if (clStatus != CL_SUCCESS)
2881    {
2882      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2883      goto cleanup;
2884    }
2885
2886
2887    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
2888    if (clStatus != CL_SUCCESS)
2889    {
2890      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
2891      goto cleanup;
2892    }
2893    for (i = 0; i < kernel->width; i++)
2894    {
2895      kernelBufferPtr[i] = (float) kernel->values[i];
2896    }
2897    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
2898    if (clStatus != CL_SUCCESS)
2899    {
2900      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
2901      goto cleanup;
2902    }
2903  }
2904
2905  {
2906    /* get the opencl kernel */
2907    {
2908      unsharpMaskKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMask");
2909      if (unsharpMaskKernel == NULL)
2910      {
2911        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2912        goto cleanup;
2913      };
2914    }
2915
2916    {
2917      imageColumns = image->columns;
2918      imageRows = image->rows;
2919      kernelWidth = kernel->width;
2920      fGain = (float)gain;
2921      fThreshold = (float)threshold;
2922      justBlur = blurOnly;
2923
2924      /* set the kernel arguments */
2925      i = 0;
2926      clStatus=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2927      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2928      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
2929      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
2930      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
2931      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&imageRows);
2932      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernel->width)),(void *)NULL);
2933      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
2934      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
2935      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&justBlur);
2936      if (clStatus != CL_SUCCESS)
2937      {
2938        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2939        goto cleanup;
2940      }
2941    }
2942
2943    /* launch the kernel */
2944    {
2945      size_t gsize[2];
2946      size_t wsize[2];
2947
2948      gsize[0] = ((image->columns + 7) / 8) * 8;
2949      gsize[1] = ((image->rows + 31) / 32) * 32;
2950      wsize[0] = 8;
2951      wsize[1] = 32;
2952
2953      clStatus = clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskKernel, 2, NULL, gsize, wsize, 0, NULL, NULL);
2954      if (clStatus != CL_SUCCESS)
2955      {
2956        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2957        goto cleanup;
2958      }
2959      clEnv->library->clFlush(queue);
2960    }
2961  }
2962
2963  /* get result */
2964  if (ALIGNED(filteredPixels,CLPixelPacket))
2965  {
2966    length = image->columns * image->rows;
2967    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2968  }
2969  else
2970  {
2971    length = image->columns * image->rows;
2972    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2973  }
2974  if (clStatus != CL_SUCCESS)
2975  {
2976    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
2977    goto cleanup;
2978  }
2979
2980  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2981
2982cleanup:
2983  OpenCLLogException(__FUNCTION__,__LINE__,exception);
2984
2985  image_view=DestroyCacheView(image_view);
2986  if (filteredImage_view != NULL)
2987    filteredImage_view=DestroyCacheView(filteredImage_view);
2988
2989  if (kernel != NULL)			      kernel=DestroyKernelInfo(kernel);
2990  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
2991  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
2992  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
2993  if (unsharpMaskKernel!=NULL)                RelinquishOpenCLKernel(clEnv, unsharpMaskKernel);
2994  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
2995  if (outputReady == MagickFalse)
2996  {
2997    if (filteredImage != NULL)
2998    {
2999      DestroyImage(filteredImage);
3000      filteredImage = NULL;
3001    }
3002  }
3003  return(filteredImage);
3004}
3005
3006
3007MagickExport Image *AccelerateUnsharpMaskImage(const Image *image,
3008  const ChannelType channel,const double radius,const double sigma,
3009  const double gain,const double threshold,ExceptionInfo *exception)
3010{
3011  Image
3012    *filteredImage;
3013
3014  assert(image != NULL);
3015  assert(exception != (ExceptionInfo *) NULL);
3016
3017  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
3018      (checkAccelerateCondition(image, channel) == MagickFalse))
3019    return NULL;
3020
3021  if (radius < 12.1)
3022    filteredImage = ComputeUnsharpMaskImageSingle(image,channel,radius,sigma,gain,threshold, 0, exception);
3023  else if (splitImage(image) && (image->rows / 2 > radius))
3024    filteredImage = ComputeUnsharpMaskImageSection(image,channel,radius,sigma,gain,threshold,exception);
3025  else
3026    filteredImage = ComputeUnsharpMaskImage(image,channel,radius,sigma,gain,threshold,exception);
3027  return(filteredImage);
3028}
3029
3030/*
3031%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3032%                                                                             %
3033%                                                                             %
3034%                                                                             %
3035%   A c c e l e r a t e R e s i z e I m a g e                                 %
3036%                                                                             %
3037%                                                                             %
3038%                                                                             %
3039%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3040%
3041%  AccelerateResizeImage() is an OpenCL implementation of ResizeImage()
3042%
3043%  AccelerateResizeImage() scales an image to the desired dimensions, using the given
3044%  filter (see AcquireFilterInfo()).
3045%
3046%  If an undefined filter is given the filter defaults to Mitchell for a
3047%  colormapped image, a image with a matte channel, or if the image is
3048%  enlarged.  Otherwise the filter defaults to a Lanczos.
3049%
3050%  AccelerateResizeImage() was inspired by Paul Heckbert's "zoom" program.
3051%
3052%  The format of the AccelerateResizeImage method is:
3053%
3054%      Image *ResizeImage(Image *image,const size_t columns,
3055%        const size_t rows, const ResizeFilter* filter,
3056%        ExceptionInfo *exception)
3057%
3058%  A description of each parameter follows:
3059%
3060%    o image: the image.
3061%
3062%    o columns: the number of columns in the scaled image.
3063%
3064%    o rows: the number of rows in the scaled image.
3065%
3066%    o filter: Image filter to use.
3067%
3068%    o exception: return any errors or warnings in this structure.
3069%
3070*/
3071
3072static MagickBooleanType resizeHorizontalFilter(cl_mem image,
3073  const unsigned int imageColumns,const unsigned int imageRows,
3074  const unsigned int matte,cl_mem resizedImage,
3075  const unsigned int resizedColumns,const unsigned int resizedRows,
3076  const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3077  const float xFactor,MagickCLEnv clEnv,cl_command_queue queue,
3078  ExceptionInfo *exception)
3079{
3080  cl_kernel
3081    horizontalKernel;
3082
3083  cl_int clStatus;
3084
3085  const unsigned int
3086    workgroupSize = 256;
3087
3088  float
3089    resizeFilterScale,
3090    resizeFilterSupport,
3091    resizeFilterWindowSupport,
3092    resizeFilterBlur,
3093    scale,
3094    support;
3095
3096  int
3097    cacheRangeStart,
3098    cacheRangeEnd,
3099    numCachedPixels,
3100    resizeFilterType,
3101    resizeWindowType;
3102
3103  MagickBooleanType
3104    status = MagickFalse;
3105
3106  size_t
3107    deviceLocalMemorySize,
3108    gammaAccumulatorLocalMemorySize,
3109    global_work_size[2],
3110    imageCacheLocalMemorySize,
3111    pixelAccumulatorLocalMemorySize,
3112    local_work_size[2],
3113    totalLocalMemorySize,
3114    weightAccumulatorLocalMemorySize;
3115
3116  unsigned int
3117    chunkSize,
3118    i,
3119    pixelPerWorkgroup;
3120
3121  horizontalKernel = NULL;
3122  status = MagickFalse;
3123
3124  /*
3125  Apply filter to resize vertically from image to resize image.
3126  */
3127  scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
3128  support=scale*GetResizeFilterSupport(resizeFilter);
3129  if (support < 0.5)
3130  {
3131    /*
3132    Support too small even for nearest neighbour: Reduce to point
3133    sampling.
3134    */
3135    support=(MagickRealType) 0.5;
3136    scale=1.0;
3137  }
3138  scale=PerceptibleReciprocal(scale);
3139
3140  if (resizedColumns < workgroupSize)
3141  {
3142    chunkSize = 32;
3143    pixelPerWorkgroup = 32;
3144  }
3145  else
3146  {
3147    chunkSize = workgroupSize;
3148    pixelPerWorkgroup = workgroupSize;
3149  }
3150
3151  /* get the local memory size supported by the device */
3152  deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
3153
3154DisableMSCWarning(4127)
3155  while(1)
3156RestoreMSCWarning
3157  {
3158    /* calculate the local memory size needed per workgroup */
3159    cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
3160    cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
3161    numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
3162    imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
3163    totalLocalMemorySize = imageCacheLocalMemorySize;
3164
3165    /* local size for the pixel accumulator */
3166    pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
3167    totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
3168
3169    /* local memory size for the weight accumulator */
3170    weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
3171    totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
3172
3173    /* local memory size for the gamma accumulator */
3174    if (matte == 0)
3175      gammaAccumulatorLocalMemorySize = sizeof(float);
3176    else
3177      gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
3178    totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
3179
3180    if (totalLocalMemorySize <= deviceLocalMemorySize)
3181      break;
3182    else
3183    {
3184      pixelPerWorkgroup = pixelPerWorkgroup/2;
3185      chunkSize = chunkSize/2;
3186      if (pixelPerWorkgroup == 0
3187          || chunkSize == 0)
3188      {
3189        /* quit, fallback to CPU */
3190        goto cleanup;
3191      }
3192    }
3193  }
3194
3195  resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
3196  resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
3197
3198
3199  if (resizeFilterType == SincFastWeightingFunction
3200    && resizeWindowType == SincFastWeightingFunction)
3201  {
3202    horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilterSinc");
3203  }
3204  else
3205  {
3206    horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilter");
3207  }
3208  if (horizontalKernel == NULL)
3209  {
3210    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
3211    goto cleanup;
3212  }
3213
3214  i = 0;
3215  clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&image);
3216  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
3217  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
3218  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
3219  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
3220  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
3221
3222  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
3223  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
3224
3225  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
3226  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
3227  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
3228
3229  resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
3230  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
3231
3232  resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
3233  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
3234
3235  resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
3236  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
3237
3238  resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
3239  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
3240
3241
3242  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
3243  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
3244  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
3245  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
3246
3247
3248  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
3249  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
3250  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
3251
3252  if (clStatus != CL_SUCCESS)
3253  {
3254    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3255    goto cleanup;
3256  }
3257
3258  global_work_size[0] = (resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
3259  global_work_size[1] = resizedRows;
3260
3261  local_work_size[0] = workgroupSize;
3262  local_work_size[1] = 1;
3263  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
3264  (void) local_work_size;
3265  if (clStatus != CL_SUCCESS)
3266  {
3267    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3268    goto cleanup;
3269  }
3270  clEnv->library->clFlush(queue);
3271  status = MagickTrue;
3272
3273
3274cleanup:
3275  OpenCLLogException(__FUNCTION__,__LINE__,exception);
3276
3277  if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
3278
3279  return(status);
3280}
3281
3282static MagickBooleanType resizeVerticalFilter(cl_mem image,
3283  const unsigned int imageColumns,const unsigned int imageRows,
3284  const unsigned int matte,cl_mem resizedImage,
3285  const unsigned int resizedColumns,const unsigned int resizedRows,
3286  const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3287  const float yFactor,MagickCLEnv clEnv,cl_command_queue queue,
3288  ExceptionInfo *exception)
3289{
3290  cl_kernel
3291    horizontalKernel;
3292
3293  cl_int clStatus;
3294
3295  const unsigned int
3296    workgroupSize = 256;
3297
3298  float
3299    resizeFilterScale,
3300    resizeFilterSupport,
3301    resizeFilterWindowSupport,
3302    resizeFilterBlur,
3303    scale,
3304    support;
3305
3306  int
3307    cacheRangeStart,
3308    cacheRangeEnd,
3309    numCachedPixels,
3310    resizeFilterType,
3311    resizeWindowType;
3312
3313  MagickBooleanType
3314    status = MagickFalse;
3315
3316  size_t
3317    deviceLocalMemorySize,
3318    gammaAccumulatorLocalMemorySize,
3319    global_work_size[2],
3320    imageCacheLocalMemorySize,
3321    pixelAccumulatorLocalMemorySize,
3322    local_work_size[2],
3323    totalLocalMemorySize,
3324    weightAccumulatorLocalMemorySize;
3325
3326  unsigned int
3327    chunkSize,
3328    i,
3329    pixelPerWorkgroup;
3330
3331  horizontalKernel = NULL;
3332  status = MagickFalse;
3333
3334  /*
3335  Apply filter to resize vertically from image to resize image.
3336  */
3337  scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
3338  support=scale*GetResizeFilterSupport(resizeFilter);
3339  if (support < 0.5)
3340  {
3341    /*
3342    Support too small even for nearest neighbour: Reduce to point
3343    sampling.
3344    */
3345    support=(MagickRealType) 0.5;
3346    scale=1.0;
3347  }
3348  scale=PerceptibleReciprocal(scale);
3349
3350  if (resizedRows < workgroupSize)
3351  {
3352    chunkSize = 32;
3353    pixelPerWorkgroup = 32;
3354  }
3355  else
3356  {
3357    chunkSize = workgroupSize;
3358    pixelPerWorkgroup = workgroupSize;
3359  }
3360
3361  /* get the local memory size supported by the device */
3362  deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
3363
3364DisableMSCWarning(4127)
3365  while(1)
3366RestoreMSCWarning
3367  {
3368    /* calculate the local memory size needed per workgroup */
3369    cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
3370    cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
3371    numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
3372    imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
3373    totalLocalMemorySize = imageCacheLocalMemorySize;
3374
3375    /* local size for the pixel accumulator */
3376    pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
3377    totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
3378
3379    /* local memory size for the weight accumulator */
3380    weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
3381    totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
3382
3383    /* local memory size for the gamma accumulator */
3384    if (matte == 0)
3385      gammaAccumulatorLocalMemorySize = sizeof(float);
3386    else
3387      gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
3388    totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
3389
3390    if (totalLocalMemorySize <= deviceLocalMemorySize)
3391      break;
3392    else
3393    {
3394      pixelPerWorkgroup = pixelPerWorkgroup/2;
3395      chunkSize = chunkSize/2;
3396      if (pixelPerWorkgroup == 0
3397          || chunkSize == 0)
3398      {
3399        /* quit, fallback to CPU */
3400        goto cleanup;
3401      }
3402    }
3403  }
3404
3405  resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
3406  resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
3407
3408  if (resizeFilterType == SincFastWeightingFunction
3409    && resizeWindowType == SincFastWeightingFunction)
3410    horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilterSinc");
3411  else
3412    horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilter");
3413
3414  if (horizontalKernel == NULL)
3415  {
3416    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
3417    goto cleanup;
3418  }
3419
3420  i = 0;
3421  clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&image);
3422  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
3423  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
3424  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
3425  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&yFactor);
3426  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
3427
3428  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
3429  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
3430
3431  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
3432  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
3433  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
3434
3435  resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
3436  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
3437
3438  resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
3439  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
3440
3441  resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
3442  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
3443
3444  resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
3445  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
3446
3447
3448  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
3449  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
3450  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
3451  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
3452
3453
3454  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
3455  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
3456  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
3457
3458  if (clStatus != CL_SUCCESS)
3459  {
3460    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3461    goto cleanup;
3462  }
3463
3464  global_work_size[0] = resizedColumns;
3465  global_work_size[1] = (resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
3466
3467  local_work_size[0] = 1;
3468  local_work_size[1] = workgroupSize;
3469  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, NULL);
3470  if (clStatus != CL_SUCCESS)
3471  {
3472    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3473    goto cleanup;
3474  }
3475  clEnv->library->clFlush(queue);
3476  status = MagickTrue;
3477
3478
3479cleanup:
3480  OpenCLLogException(__FUNCTION__,__LINE__,exception);
3481
3482  if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
3483
3484  return(status);
3485}
3486
3487static Image *ComputeResizeImage(const Image* image,
3488  const size_t resizedColumns,const size_t resizedRows,
3489  const ResizeFilter *resizeFilter,ExceptionInfo *exception)
3490{
3491  CacheView
3492    *filteredImage_view,
3493    *image_view;
3494
3495  cl_command_queue
3496    queue;
3497
3498  cl_int
3499    clStatus;
3500
3501  cl_context
3502    context;
3503
3504  cl_mem
3505    cubicCoefficientsBuffer,
3506    filteredImageBuffer,
3507    imageBuffer,
3508    tempImageBuffer;
3509
3510  cl_mem_flags
3511    mem_flags;
3512
3513  const double
3514    *resizeFilterCoefficient;
3515
3516  const void
3517    *inputPixels;
3518
3519  float
3520    *mappedCoefficientBuffer,
3521    xFactor,
3522    yFactor;
3523
3524  MagickBooleanType
3525    outputReady,
3526    status;
3527
3528  MagickCLEnv
3529    clEnv;
3530
3531  MagickSizeType
3532    length;
3533
3534  Image
3535    *filteredImage;
3536
3537  unsigned int
3538    i;
3539
3540  void
3541    *filteredPixels,
3542    *hostPtr;
3543
3544  outputReady = MagickFalse;
3545  filteredImage = NULL;
3546  filteredImage_view = NULL;
3547  clEnv = NULL;
3548  context = NULL;
3549  imageBuffer = NULL;
3550  tempImageBuffer = NULL;
3551  filteredImageBuffer = NULL;
3552  cubicCoefficientsBuffer = NULL;
3553  queue = NULL;
3554
3555  clEnv = GetDefaultOpenCLEnv();
3556  context = GetOpenCLContext(clEnv);
3557
3558  /* Create and initialize OpenCL buffers. */
3559  image_view=AcquireVirtualCacheView(image,exception);
3560  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
3561  if (inputPixels == (const void *) NULL)
3562  {
3563    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3564    goto cleanup;
3565  }
3566
3567  /* If the host pointer is aligned to the size of CLPixelPacket,
3568     then use the host buffer directly from the GPU; otherwise,
3569     create a buffer on the GPU and copy the data over */
3570  if (ALIGNED(inputPixels,CLPixelPacket))
3571  {
3572    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3573  }
3574  else
3575  {
3576    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3577  }
3578  /* create a CL buffer from image pixel buffer */
3579  length = image->columns * image->rows;
3580  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3581  if (clStatus != CL_SUCCESS)
3582  {
3583    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3584    goto cleanup;
3585  }
3586
3587  cubicCoefficientsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
3588  if (clStatus != CL_SUCCESS)
3589  {
3590    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3591    goto cleanup;
3592  }
3593  queue = AcquireOpenCLCommandQueue(clEnv);
3594  mappedCoefficientBuffer = (float*)clEnv->library->clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float)
3595          , 0, NULL, NULL, &clStatus);
3596  if (clStatus != CL_SUCCESS)
3597  {
3598    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
3599    goto cleanup;
3600  }
3601  resizeFilterCoefficient = GetResizeFilterCoefficient(resizeFilter);
3602  for (i = 0; i < 7; i++)
3603  {
3604    mappedCoefficientBuffer[i] = (float) resizeFilterCoefficient[i];
3605  }
3606  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL);
3607  if (clStatus != CL_SUCCESS)
3608  {
3609    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
3610    goto cleanup;
3611  }
3612
3613  filteredImage = CloneImage(image,resizedColumns,resizedRows,MagickTrue,exception);
3614  if (filteredImage == NULL)
3615    goto cleanup;
3616
3617  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3618  {
3619    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
3620    goto cleanup;
3621  }
3622  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3623  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3624  if (filteredPixels == (void *) NULL)
3625  {
3626    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3627    goto cleanup;
3628  }
3629
3630  if (ALIGNED(filteredPixels,CLPixelPacket))
3631  {
3632    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3633    hostPtr = filteredPixels;
3634  }
3635  else
3636  {
3637    mem_flags = CL_MEM_WRITE_ONLY;
3638    hostPtr = NULL;
3639  }
3640
3641  /* create a CL buffer from image pixel buffer */
3642  length = filteredImage->columns * filteredImage->rows;
3643  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3644  if (clStatus != CL_SUCCESS)
3645  {
3646    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3647    goto cleanup;
3648  }
3649
3650  xFactor=(float) resizedColumns/(float) image->columns;
3651  yFactor=(float) resizedRows/(float) image->rows;
3652  if (xFactor > yFactor)
3653  {
3654
3655    length = resizedColumns*image->rows;
3656    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
3657    if (clStatus != CL_SUCCESS)
3658    {
3659      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3660      goto cleanup;
3661    }
3662
3663    status = resizeHorizontalFilter(imageBuffer, image->columns, image->rows, (image->alpha_trait==BlendPixelTrait)?1:0
3664          , tempImageBuffer, resizedColumns, image->rows
3665          , resizeFilter, cubicCoefficientsBuffer
3666          , xFactor, clEnv, queue, exception);
3667    if (status != MagickTrue)
3668      goto cleanup;
3669
3670    status = resizeVerticalFilter(tempImageBuffer, resizedColumns, image->rows, (image->alpha_trait==BlendPixelTrait)?1:0
3671       , filteredImageBuffer, resizedColumns, resizedRows
3672       , resizeFilter, cubicCoefficientsBuffer
3673       , yFactor, clEnv, queue, exception);
3674    if (status != MagickTrue)
3675      goto cleanup;
3676  }
3677  else
3678  {
3679    length = image->columns*resizedRows;
3680    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
3681    if (clStatus != CL_SUCCESS)
3682    {
3683      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3684      goto cleanup;
3685    }
3686
3687    status = resizeVerticalFilter(imageBuffer, image->columns, image->rows, (image->alpha_trait==BlendPixelTrait)?1:0
3688       , tempImageBuffer, image->columns, resizedRows
3689       , resizeFilter, cubicCoefficientsBuffer
3690       , yFactor, clEnv, queue, exception);
3691    if (status != MagickTrue)
3692      goto cleanup;
3693
3694    status = resizeHorizontalFilter(tempImageBuffer, image->columns, resizedRows, (image->alpha_trait==BlendPixelTrait)?1:0
3695       , filteredImageBuffer, resizedColumns, resizedRows
3696       , resizeFilter, cubicCoefficientsBuffer
3697       , xFactor, clEnv, queue, exception);
3698    if (status != MagickTrue)
3699      goto cleanup;
3700  }
3701  length = resizedColumns*resizedRows;
3702  if (ALIGNED(filteredPixels,CLPixelPacket))
3703  {
3704    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3705  }
3706  else
3707  {
3708    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3709  }
3710  if (clStatus != CL_SUCCESS)
3711  {
3712    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
3713    goto cleanup;
3714  }
3715  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3716
3717cleanup:
3718  OpenCLLogException(__FUNCTION__,__LINE__,exception);
3719
3720  image_view=DestroyCacheView(image_view);
3721  if (filteredImage_view != NULL)
3722    filteredImage_view=DestroyCacheView(filteredImage_view);
3723
3724  if (imageBuffer!=NULL)		  clEnv->library->clReleaseMemObject(imageBuffer);
3725  if (tempImageBuffer!=NULL)		  clEnv->library->clReleaseMemObject(tempImageBuffer);
3726  if (filteredImageBuffer!=NULL)	  clEnv->library->clReleaseMemObject(filteredImageBuffer);
3727  if (cubicCoefficientsBuffer!=NULL)      clEnv->library->clReleaseMemObject(cubicCoefficientsBuffer);
3728  if (queue != NULL)  	                  RelinquishOpenCLCommandQueue(clEnv, queue);
3729  if (outputReady == MagickFalse && filteredImage != NULL)
3730    filteredImage=DestroyImage(filteredImage);
3731  return(filteredImage);
3732}
3733
3734const ResizeWeightingFunctionType supportedResizeWeighting[] =
3735{
3736  BoxWeightingFunction,
3737  TriangleWeightingFunction,
3738  HanningWeightingFunction,
3739  HammingWeightingFunction,
3740  BlackmanWeightingFunction,
3741  CubicBCWeightingFunction,
3742  SincWeightingFunction,
3743  SincFastWeightingFunction,
3744  LastWeightingFunction
3745};
3746
3747static MagickBooleanType gpuSupportedResizeWeighting(
3748  ResizeWeightingFunctionType f)
3749{
3750  unsigned int
3751    i;
3752
3753  for (i = 0; ;i++)
3754  {
3755    if (supportedResizeWeighting[i] == LastWeightingFunction)
3756      break;
3757    if (supportedResizeWeighting[i] == f)
3758      return(MagickTrue);
3759  }
3760  return(MagickFalse);
3761}
3762
3763MagickExport Image *AccelerateResizeImage(const Image *image,
3764  const size_t resizedColumns,const size_t resizedRows,
3765  const ResizeFilter *resizeFilter,ExceptionInfo *exception)
3766{
3767  Image
3768    *filteredImage;
3769
3770  assert(image != NULL);
3771  assert(exception != (ExceptionInfo *) NULL);
3772
3773  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
3774      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
3775    return NULL;
3776
3777  if (gpuSupportedResizeWeighting(GetResizeFilterWeightingType(resizeFilter)) == MagickFalse ||
3778      gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(resizeFilter)) == MagickFalse)
3779    return NULL;
3780
3781  filteredImage=ComputeResizeImage(image,resizedColumns,resizedRows,resizeFilter,exception);
3782  return(filteredImage);
3783}
3784
3785/*
3786%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3787%                                                                             %
3788%                                                                             %
3789%                                                                             %
3790%     C o n t r a s t I m a g e  w i t h  O p e n C L                         %
3791%                                                                             %
3792%                                                                             %
3793%                                                                             %
3794%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3795%
3796%  ContrastImage() enhances the intensity differences between the lighter and
3797%  darker elements of the image.  Set sharpen to a MagickTrue to increase the
3798%  image contrast otherwise the contrast is reduced.
3799%
3800%  The format of the ContrastImage method is:
3801%
3802%      MagickBooleanType ContrastImage(Image *image,
3803%        const MagickBooleanType sharpen)
3804%
3805%  A description of each parameter follows:
3806%
3807%    o image: the image.
3808%
3809%    o sharpen: Increase or decrease image contrast.
3810%
3811*/
3812
3813static MagickBooleanType ComputeContrastImage(Image *image,
3814  const MagickBooleanType sharpen,ExceptionInfo *exception)
3815{
3816  CacheView
3817    *image_view;
3818
3819  cl_command_queue
3820    queue;
3821
3822  cl_context
3823    context;
3824
3825  cl_int
3826    clStatus;
3827
3828  cl_kernel
3829    filterKernel;
3830
3831  cl_mem
3832    imageBuffer;
3833
3834  cl_mem_flags
3835    mem_flags;
3836
3837  MagickBooleanType
3838    outputReady;
3839
3840  MagickCLEnv
3841    clEnv;
3842
3843  MagickSizeType
3844    length;
3845
3846  size_t
3847    global_work_size[2];
3848
3849  unsigned int
3850    i,
3851    uSharpen;
3852
3853  void
3854    *inputPixels;
3855
3856  outputReady = MagickFalse;
3857  clEnv = NULL;
3858  inputPixels = NULL;
3859  context = NULL;
3860  imageBuffer = NULL;
3861  filterKernel = NULL;
3862  queue = NULL;
3863
3864  clEnv = GetDefaultOpenCLEnv();
3865  context = GetOpenCLContext(clEnv);
3866
3867  /* Create and initialize OpenCL buffers. */
3868  image_view=AcquireAuthenticCacheView(image,exception);
3869  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3870  if (inputPixels == (void *) NULL)
3871  {
3872    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3873    goto cleanup;
3874  }
3875
3876  /* If the host pointer is aligned to the size of CLPixelPacket,
3877     then use the host buffer directly from the GPU; otherwise,
3878     create a buffer on the GPU and copy the data over */
3879  if (ALIGNED(inputPixels,CLPixelPacket))
3880  {
3881    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3882  }
3883  else
3884  {
3885    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3886  }
3887  /* create a CL buffer from image pixel buffer */
3888  length = image->columns * image->rows;
3889  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3890  if (clStatus != CL_SUCCESS)
3891  {
3892    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3893    goto cleanup;
3894  }
3895
3896  filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
3897  if (filterKernel == NULL)
3898  {
3899    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
3900    goto cleanup;
3901  }
3902
3903  i = 0;
3904  clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3905
3906  uSharpen = (sharpen == MagickFalse)?0:1;
3907  clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
3908  if (clStatus != CL_SUCCESS)
3909  {
3910    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3911    goto cleanup;
3912  }
3913
3914  global_work_size[0] = image->columns;
3915  global_work_size[1] = image->rows;
3916  /* launch the kernel */
3917  queue = AcquireOpenCLCommandQueue(clEnv);
3918  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
3919  if (clStatus != CL_SUCCESS)
3920  {
3921    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3922    goto cleanup;
3923  }
3924  clEnv->library->clFlush(queue);
3925
3926  if (ALIGNED(inputPixels,CLPixelPacket))
3927  {
3928    length = image->columns * image->rows;
3929    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3930  }
3931  else
3932  {
3933    length = image->columns * image->rows;
3934    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3935  }
3936  if (clStatus != CL_SUCCESS)
3937  {
3938    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
3939    goto cleanup;
3940  }
3941  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3942
3943cleanup:
3944  OpenCLLogException(__FUNCTION__,__LINE__,exception);
3945
3946  image_view=DestroyCacheView(image_view);
3947
3948  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
3949  if (filterKernel!=NULL)                     RelinquishOpenCLKernel(clEnv, filterKernel);
3950  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
3951  return(outputReady);
3952}
3953
3954MagickExport MagickBooleanType AccelerateContrastImage(Image *image,
3955  const MagickBooleanType sharpen,ExceptionInfo *exception)
3956{
3957  MagickBooleanType
3958    status;
3959
3960  assert(image != NULL);
3961  assert(exception != (ExceptionInfo *) NULL);
3962
3963  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
3964      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
3965    return(MagickFalse);
3966
3967  status = ComputeContrastImage(image,sharpen,exception);
3968  return(status);
3969}
3970
3971/*
3972%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3973%                                                                             %
3974%                                                                             %
3975%                                                                             %
3976%     M o d u l a t e I m a g e  w i t h  O p e n C L                         %
3977%                                                                             %
3978%                                                                             %
3979%                                                                             %
3980%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3981%
3982%  ModulateImage() lets you control the brightness, saturation, and hue
3983%  of an image.  Modulate represents the brightness, saturation, and hue
3984%  as one parameter (e.g. 90,150,100).  If the image colorspace is HSL, the
3985%  modulation is lightness, saturation, and hue.  For HWB, use blackness,
3986%  whiteness, and hue. And for HCL, use chrome, luma, and hue.
3987%
3988%  The format of the ModulateImage method is:
3989%
3990%      MagickBooleanType ModulateImage(Image *image,const char *modulate)
3991%
3992%  A description of each parameter follows:
3993%
3994%    o image: the image.
3995%
3996%    o percent_*: Define the percent change in brightness, saturation, and
3997%      hue.
3998%
3999*/
4000
4001MagickBooleanType ComputeModulateImage(Image *image,
4002  double percent_brightness,double percent_hue,double percent_saturation,
4003  ColorspaceType colorspace,ExceptionInfo *exception)
4004{
4005  CacheView
4006    *image_view;
4007
4008  cl_float
4009    bright,
4010    hue,
4011    saturation;
4012
4013  cl_context
4014    context;
4015
4016  cl_command_queue
4017    queue;
4018
4019  cl_int
4020    color,
4021    clStatus;
4022
4023  cl_kernel
4024    modulateKernel;
4025
4026  cl_mem
4027    imageBuffer;
4028
4029  cl_mem_flags
4030    mem_flags;
4031
4032  MagickBooleanType
4033    outputReady;
4034
4035  MagickCLEnv
4036    clEnv;
4037
4038  MagickSizeType
4039    length;
4040
4041  register ssize_t
4042    i;
4043
4044  void
4045    *inputPixels;
4046
4047  inputPixels = NULL;
4048  imageBuffer = NULL;
4049  modulateKernel = NULL;
4050
4051  assert(image != (Image *) NULL);
4052  assert(image->signature == MagickSignature);
4053  if (image->debug != MagickFalse)
4054    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
4055
4056  /*
4057   * initialize opencl env
4058   */
4059  clEnv = GetDefaultOpenCLEnv();
4060  context = GetOpenCLContext(clEnv);
4061  queue = AcquireOpenCLCommandQueue(clEnv);
4062
4063  outputReady = MagickFalse;
4064
4065  /* Create and initialize OpenCL buffers.
4066   inputPixels = AcquirePixelCachePixels(image, &length, exception);
4067   assume this  will get a writable image
4068   */
4069  image_view=AcquireAuthenticCacheView(image,exception);
4070  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
4071  if (inputPixels == (void *) NULL)
4072  {
4073    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
4074    goto cleanup;
4075  }
4076
4077  /* If the host pointer is aligned to the size of CLPixelPacket,
4078   then use the host buffer directly from the GPU; otherwise,
4079   create a buffer on the GPU and copy the data over
4080   */
4081  if (ALIGNED(inputPixels,CLPixelPacket))
4082  {
4083    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
4084  }
4085  else
4086  {
4087    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
4088  }
4089  /* create a CL buffer from image pixel buffer */
4090  length = image->columns * image->rows;
4091  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4092  if (clStatus != CL_SUCCESS)
4093  {
4094    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4095    goto cleanup;
4096  }
4097
4098  modulateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Modulate");
4099  if (modulateKernel == NULL)
4100  {
4101    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4102    goto cleanup;
4103  }
4104
4105  bright=percent_brightness;
4106  hue=percent_hue;
4107  saturation=percent_saturation;
4108  color=colorspace;
4109
4110  i = 0;
4111  clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4112  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
4113  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
4114  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
4115  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
4116  if (clStatus != CL_SUCCESS)
4117  {
4118    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4119    printf("no kernel\n");
4120    goto cleanup;
4121  }
4122
4123  {
4124    size_t global_work_size[2];
4125    global_work_size[0] = image->columns;
4126    global_work_size[1] = image->rows;
4127    /* launch the kernel */
4128    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4129    if (clStatus != CL_SUCCESS)
4130    {
4131      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4132      goto cleanup;
4133    }
4134    clEnv->library->clFlush(queue);
4135  }
4136
4137  if (ALIGNED(inputPixels,CLPixelPacket))
4138  {
4139    length = image->columns * image->rows;
4140    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4141  }
4142  else
4143  {
4144    length = image->columns * image->rows;
4145    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
4146  }
4147  if (clStatus != CL_SUCCESS)
4148  {
4149    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
4150    goto cleanup;
4151  }
4152
4153  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
4154
4155cleanup:
4156  OpenCLLogException(__FUNCTION__,__LINE__,exception);
4157
4158  image_view=DestroyCacheView(image_view);
4159
4160  if (imageBuffer!=NULL)
4161    clEnv->library->clReleaseMemObject(imageBuffer);
4162  if (modulateKernel!=NULL)
4163    RelinquishOpenCLKernel(clEnv, modulateKernel);
4164  if (queue != NULL)
4165    RelinquishOpenCLCommandQueue(clEnv, queue);
4166
4167  return outputReady;
4168
4169}
4170
4171MagickExport MagickBooleanType AccelerateModulateImage(Image *image,
4172  double percent_brightness,double percent_hue,double percent_saturation,
4173  ColorspaceType colorspace,ExceptionInfo *exception)
4174{
4175  MagickBooleanType
4176    status;
4177
4178  assert(image != NULL);
4179  assert(exception != (ExceptionInfo *) NULL);
4180
4181  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
4182      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
4183    return(MagickFalse);
4184
4185  if ((colorspace != HSLColorspace && colorspace != UndefinedColorspace))
4186    return(MagickFalse);
4187
4188  status = ComputeModulateImage(image,percent_brightness, percent_hue, percent_saturation, colorspace, exception);
4189  return(status);
4190}
4191
4192/*
4193%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4194%                                                                             %
4195%                                                                             %
4196%                                                                             %
4197%     N e g a t e I m a g e  w i t h  O p e n C L                             %
4198%                                                                             %
4199%                                                                             %
4200%                                                                             %
4201%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4202%
4203%
4204%  A description of each parameter follows:
4205%
4206%    o image: the image.
4207%
4208%    o channel: the channel.
4209%
4210%    o grayscale: If MagickTrue, only negate grayscale pixels within the image.
4211%
4212*/
4213
4214MagickBooleanType ComputeNegateImageChannel(Image *image,
4215  const ChannelType channel,const MagickBooleanType magick_unused(grayscale),
4216  ExceptionInfo* exception)
4217{
4218  CacheView
4219    *image_view;
4220
4221  cl_context
4222    context;
4223
4224  cl_command_queue
4225    queue;
4226
4227  cl_int
4228    clStatus;
4229
4230  cl_kernel
4231    negateKernel;
4232
4233  cl_mem
4234    imageBuffer;
4235
4236  cl_mem_flags
4237    mem_flags;
4238
4239  MagickBooleanType
4240    outputReady;
4241
4242  MagickCLEnv
4243    clEnv;
4244
4245  MagickSizeType
4246    length;
4247
4248  register ssize_t
4249    i;
4250
4251  void
4252    *inputPixels;
4253
4254  magick_unreferenced(grayscale);
4255
4256  inputPixels = NULL;
4257  imageBuffer = NULL;
4258  negateKernel = NULL;
4259
4260  assert(image != (Image *) NULL);
4261  assert(image->signature == MagickSignature);
4262  if (image->debug != MagickFalse)
4263    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
4264
4265  /*
4266   * initialize opencl env
4267   */
4268  clEnv = GetDefaultOpenCLEnv();
4269  context = GetOpenCLContext(clEnv);
4270  queue = AcquireOpenCLCommandQueue(clEnv);
4271
4272  outputReady = MagickFalse;
4273
4274  /* Create and initialize OpenCL buffers.
4275   inputPixels = AcquirePixelCachePixels(image, &length, exception);
4276   assume this  will get a writable image
4277   */
4278  image_view=AcquireAuthenticCacheView(image,exception);
4279  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
4280  if (inputPixels == (void *) NULL)
4281  {
4282    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
4283    goto cleanup;
4284  }
4285
4286  /* If the host pointer is aligned to the size of CLPixelPacket,
4287   then use the host buffer directly from the GPU; otherwise,
4288   create a buffer on the GPU and copy the data over
4289   */
4290  if (ALIGNED(inputPixels,CLPixelPacket))
4291  {
4292    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
4293  }
4294  else
4295  {
4296    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
4297  }
4298  /* create a CL buffer from image pixel buffer */
4299  length = image->columns * image->rows;
4300  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4301  if (clStatus != CL_SUCCESS)
4302  {
4303    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4304    goto cleanup;
4305  }
4306
4307  negateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Negate");
4308  if (negateKernel == NULL)
4309  {
4310    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4311    goto cleanup;
4312  }
4313
4314  i = 0;
4315  clStatus=clEnv->library->clSetKernelArg(negateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4316  clStatus=clEnv->library->clSetKernelArg(negateKernel,i++,sizeof(ChannelType),(void *)&channel);
4317  if (clStatus != CL_SUCCESS)
4318  {
4319    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4320    printf("no kernel\n");
4321    goto cleanup;
4322  }
4323
4324  {
4325    size_t global_work_size[2];
4326    global_work_size[0] = image->columns;
4327    global_work_size[1] = image->rows;
4328    /* launch the kernel */
4329    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, negateKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4330    if (clStatus != CL_SUCCESS)
4331    {
4332      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4333      goto cleanup;
4334    }
4335    clEnv->library->clFlush(queue);
4336  }
4337
4338  if (ALIGNED(inputPixels,CLPixelPacket))
4339  {
4340    length = image->columns * image->rows;
4341    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4342  }
4343  else
4344  {
4345    length = image->columns * image->rows;
4346    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
4347  }
4348  if (clStatus != CL_SUCCESS)
4349  {
4350    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
4351    goto cleanup;
4352  }
4353
4354  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
4355
4356cleanup:
4357  OpenCLLogException(__FUNCTION__,__LINE__,exception);
4358
4359  image_view=DestroyCacheView(image_view);
4360
4361  if (imageBuffer!=NULL)
4362    clEnv->library->clReleaseMemObject(imageBuffer);
4363  if (negateKernel!=NULL)
4364    RelinquishOpenCLKernel(clEnv, negateKernel);
4365  if (queue != NULL)
4366    RelinquishOpenCLCommandQueue(clEnv, queue);
4367
4368  return(outputReady);
4369}
4370
4371MagickExport MagickBooleanType AccelerateNegateImageChannel(Image *image,
4372  const ChannelType channel,const MagickBooleanType grayscale,
4373  ExceptionInfo* exception)
4374{
4375  MagickBooleanType
4376    status;
4377
4378  assert(image != NULL);
4379  assert(exception != (ExceptionInfo *) NULL);
4380
4381  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
4382      (checkAccelerateCondition(image, channel) == MagickFalse))
4383    return(MagickFalse);
4384
4385  status=ComputeNegateImageChannel(image,channel,grayscale,exception);
4386  return(status);
4387}
4388
4389/*
4390%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4391%                                                                             %
4392%                                                                             %
4393%                                                                             %
4394%     G r a y s c a l e I m a g e  w i t h  O p e n C L                       %
4395%                                                                             %
4396%                                                                             %
4397%                                                                             %
4398%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4399%
4400%  GrayscaleImage() converts the colors in the reference image to gray.
4401%
4402%  The format of the GrayscaleImageChannel method is:
4403%
4404%      MagickBooleanType GrayscaleImage(Image *image,
4405%        const PixelIntensityMethod method)
4406%
4407%  A description of each parameter follows:
4408%
4409%    o image: the image.
4410%
4411%    o channel: the channel.
4412%
4413*/
4414
4415MagickBooleanType ComputeGrayscaleImage(Image *image,
4416  const PixelIntensityMethod method,ExceptionInfo *exception)
4417{
4418  CacheView
4419    *image_view;
4420
4421  cl_command_queue
4422    queue;
4423
4424  cl_context
4425    context;
4426
4427  cl_int
4428    clStatus,
4429    intensityMethod;
4430
4431  cl_int
4432    colorspace;
4433
4434  cl_kernel
4435    grayscaleKernel;
4436
4437  cl_mem
4438    imageBuffer;
4439
4440  cl_mem_flags
4441    mem_flags;
4442
4443  MagickBooleanType
4444    outputReady;
4445
4446  MagickCLEnv
4447    clEnv;
4448
4449  MagickSizeType
4450    length;
4451
4452  register ssize_t
4453    i;
4454
4455  void
4456    *inputPixels;
4457
4458  inputPixels = NULL;
4459  imageBuffer = NULL;
4460  grayscaleKernel = NULL;
4461
4462  assert(image != (Image *) NULL);
4463  assert(image->signature == MagickSignature);
4464  if (image->debug != MagickFalse)
4465    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
4466
4467  /*
4468   * initialize opencl env
4469   */
4470  clEnv = GetDefaultOpenCLEnv();
4471  context = GetOpenCLContext(clEnv);
4472  queue = AcquireOpenCLCommandQueue(clEnv);
4473
4474  outputReady = MagickFalse;
4475
4476  /* Create and initialize OpenCL buffers.
4477   inputPixels = AcquirePixelCachePixels(image, &length, exception);
4478   assume this  will get a writable image
4479   */
4480  image_view=AcquireAuthenticCacheView(image,exception);
4481  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
4482  if (inputPixels == (void *) NULL)
4483  {
4484    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
4485    goto cleanup;
4486  }
4487
4488  /* If the host pointer is aligned to the size of CLPixelPacket,
4489   then use the host buffer directly from the GPU; otherwise,
4490   create a buffer on the GPU and copy the data over
4491   */
4492  if (ALIGNED(inputPixels,CLPixelPacket))
4493  {
4494    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
4495  }
4496  else
4497  {
4498    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
4499  }
4500  /* create a CL buffer from image pixel buffer */
4501  length = image->columns * image->rows;
4502  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4503  if (clStatus != CL_SUCCESS)
4504  {
4505    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4506    goto cleanup;
4507  }
4508
4509  intensityMethod = method;
4510  colorspace = image->colorspace;
4511
4512  grayscaleKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Grayscale");
4513  if (grayscaleKernel == NULL)
4514  {
4515    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4516    goto cleanup;
4517  }
4518
4519  i = 0;
4520  clStatus=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4521  clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&intensityMethod);
4522  clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&colorspace);
4523  if (clStatus != CL_SUCCESS)
4524  {
4525    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4526    printf("no kernel\n");
4527    goto cleanup;
4528  }
4529
4530  {
4531    size_t global_work_size[2];
4532    global_work_size[0] = image->columns;
4533    global_work_size[1] = image->rows;
4534    /* launch the kernel */
4535    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, grayscaleKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4536    if (clStatus != CL_SUCCESS)
4537    {
4538      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4539      goto cleanup;
4540    }
4541    clEnv->library->clFlush(queue);
4542  }
4543
4544  if (ALIGNED(inputPixels,CLPixelPacket))
4545  {
4546    length = image->columns * image->rows;
4547    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4548  }
4549  else
4550  {
4551    length = image->columns * image->rows;
4552    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
4553  }
4554  if (clStatus != CL_SUCCESS)
4555  {
4556    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
4557    goto cleanup;
4558  }
4559
4560  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
4561
4562cleanup:
4563  OpenCLLogException(__FUNCTION__,__LINE__,exception);
4564
4565  image_view=DestroyCacheView(image_view);
4566
4567  if (imageBuffer!=NULL)
4568    clEnv->library->clReleaseMemObject(imageBuffer);
4569  if (grayscaleKernel!=NULL)
4570    RelinquishOpenCLKernel(clEnv, grayscaleKernel);
4571  if (queue != NULL)
4572    RelinquishOpenCLCommandQueue(clEnv, queue);
4573
4574  return( outputReady);
4575}
4576
4577MagickExport MagickBooleanType AccelerateGrayscaleImage(Image* image,
4578  const PixelIntensityMethod method,ExceptionInfo *exception)
4579{
4580  MagickBooleanType
4581    status;
4582
4583  assert(image != NULL);
4584  assert(exception != (ExceptionInfo *) NULL);
4585
4586  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
4587      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
4588    return(MagickFalse);
4589
4590  if (method == Rec601LuminancePixelIntensityMethod || method == Rec709LuminancePixelIntensityMethod)
4591    return(MagickFalse);
4592
4593  if (image->colorspace != sRGBColorspace)
4594    return(MagickFalse);
4595
4596  status=ComputeGrayscaleImage(image,method,exception);
4597  return(status);
4598}
4599
4600/*
4601%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4602%                                                                             %
4603%                                                                             %
4604%                                                                             %
4605%     E q u a l i z e I m a g e  w i t h  O p e n C L                         %
4606%                                                                             %
4607%                                                                             %
4608%                                                                             %
4609%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4610%
4611%  EqualizeImage() applies a histogram equalization to the image.
4612%
4613%  The format of the EqualizeImage method is:
4614%
4615%      MagickBooleanType EqualizeImage(Image *image)
4616%      MagickBooleanType EqualizeImageChannel(Image *image,
4617%        const ChannelType channel)
4618%
4619%  A description of each parameter follows:
4620%
4621%    o image: the image.
4622%
4623%    o channel: the channel.
4624%
4625*/
4626
4627static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
4628  cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,
4629  Image *image,const ChannelType channel,ExceptionInfo *exception)
4630{
4631  MagickBooleanType
4632    outputReady;
4633
4634  cl_int
4635    clStatus,
4636    colorspace,
4637    method;
4638
4639  cl_kernel
4640    histogramKernel;
4641
4642  register ssize_t
4643    i;
4644
4645  size_t
4646    global_work_size[2];
4647
4648  histogramKernel = NULL;
4649
4650  outputReady = MagickFalse;
4651  method = image->intensity;
4652  colorspace = image->colorspace;
4653
4654  /* get the OpenCL kernel */
4655  histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
4656  if (histogramKernel == NULL)
4657  {
4658    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4659    goto cleanup;
4660  }
4661
4662  /* set the kernel arguments */
4663  i = 0;
4664  clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4665  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
4666  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&method);
4667  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
4668  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
4669  if (clStatus != CL_SUCCESS)
4670  {
4671    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4672    goto cleanup;
4673  }
4674
4675  /* launch the kernel */
4676  global_work_size[0] = image->columns;
4677  global_work_size[1] = image->rows;
4678
4679  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
4680
4681  if (clStatus != CL_SUCCESS)
4682  {
4683    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4684    goto cleanup;
4685  }
4686  clEnv->library->clFlush(queue);
4687
4688  outputReady = MagickTrue;
4689
4690cleanup:
4691  OpenCLLogException(__FUNCTION__,__LINE__,exception);
4692
4693  if (histogramKernel!=NULL)
4694    RelinquishOpenCLKernel(clEnv, histogramKernel);
4695
4696  return(outputReady);
4697}
4698
4699MagickExport MagickBooleanType ComputeEqualizeImage(Image *image,
4700  const ChannelType channel,ExceptionInfo *exception)
4701{
4702#define EqualizeImageTag  "Equalize/Image"
4703
4704  CacheView
4705    *image_view;
4706
4707  cl_command_queue
4708    queue;
4709
4710  cl_context
4711    context;
4712
4713  cl_int
4714    clStatus;
4715
4716  cl_mem_flags
4717    mem_flags;
4718
4719  cl_mem
4720    equalizeMapBuffer,
4721    histogramBuffer,
4722    imageBuffer;
4723
4724  cl_kernel
4725    equalizeKernel,
4726    histogramKernel;
4727
4728  cl_uint4
4729    *histogram;
4730
4731  FloatPixelPacket
4732    white,
4733    black,
4734    intensity,
4735    *map;
4736
4737  MagickBooleanType
4738    outputReady,
4739    status;
4740
4741  MagickCLEnv
4742    clEnv;
4743
4744  MagickSizeType
4745    length;
4746
4747  PixelPacket
4748    *equalize_map;
4749
4750  register ssize_t
4751    i;
4752
4753  size_t
4754    global_work_size[2];
4755
4756  void
4757    *hostPtr,
4758    *inputPixels;
4759
4760  map=NULL;
4761  histogram=NULL;
4762  equalize_map=NULL;
4763  inputPixels = NULL;
4764  imageBuffer = NULL;
4765  histogramBuffer = NULL;
4766  equalizeMapBuffer = NULL;
4767  histogramKernel = NULL;
4768  equalizeKernel = NULL;
4769  context = NULL;
4770  queue = NULL;
4771  outputReady = MagickFalse;
4772
4773  assert(image != (Image *) NULL);
4774  assert(image->signature == MagickSignature);
4775  if (image->debug != MagickFalse)
4776    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
4777
4778  /*
4779   * initialize opencl env
4780   */
4781  clEnv = GetDefaultOpenCLEnv();
4782  context = GetOpenCLContext(clEnv);
4783  queue = AcquireOpenCLCommandQueue(clEnv);
4784
4785  /*
4786    Allocate and initialize histogram arrays.
4787  */
4788  histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
4789  if (histogram == (cl_uint4 *) NULL)
4790      ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
4791
4792  /* reset histogram */
4793  (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
4794
4795  /* Create and initialize OpenCL buffers. */
4796  /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
4797  /* assume this  will get a writable image */
4798  image_view=AcquireAuthenticCacheView(image,exception);
4799  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
4800
4801  if (inputPixels == (void *) NULL)
4802  {
4803    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
4804    goto cleanup;
4805  }
4806  /* If the host pointer is aligned to the size of CLPixelPacket,
4807     then use the host buffer directly from the GPU; otherwise,
4808     create a buffer on the GPU and copy the data over */
4809  if (ALIGNED(inputPixels,CLPixelPacket))
4810  {
4811    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4812  }
4813  else
4814  {
4815    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4816  }
4817  /* create a CL buffer from image pixel buffer */
4818  length = image->columns * image->rows;
4819  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4820  if (clStatus != CL_SUCCESS)
4821  {
4822    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4823    goto cleanup;
4824  }
4825
4826  /* If the host pointer is aligned to the size of cl_uint,
4827     then use the host buffer directly from the GPU; otherwise,
4828     create a buffer on the GPU and copy the data over */
4829  if (ALIGNED(histogram,cl_uint4))
4830  {
4831    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
4832    hostPtr = histogram;
4833  }
4834  else
4835  {
4836    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
4837    hostPtr = histogram;
4838  }
4839  /* create a CL buffer for histogram  */
4840  length = (MaxMap+1);
4841  histogramBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
4842  if (clStatus != CL_SUCCESS)
4843  {
4844    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4845    goto cleanup;
4846  }
4847
4848  status = LaunchHistogramKernel(clEnv, queue, imageBuffer, histogramBuffer, image, channel, exception);
4849  if (status == MagickFalse)
4850    goto cleanup;
4851
4852  /* read from the kenel output */
4853  if (ALIGNED(histogram,cl_uint4))
4854  {
4855    length = (MaxMap+1);
4856    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
4857  }
4858  else
4859  {
4860    length = (MaxMap+1);
4861    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
4862  }
4863  if (clStatus != CL_SUCCESS)
4864  {
4865    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
4866    goto cleanup;
4867  }
4868
4869  /* unmap, don't block gpu to use this buffer again.  */
4870  if (ALIGNED(histogram,cl_uint4))
4871  {
4872    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
4873    if (clStatus != CL_SUCCESS)
4874    {
4875      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
4876      goto cleanup;
4877    }
4878  }
4879
4880  /* recreate input buffer later, in case image updated */
4881#ifdef RECREATEBUFFER
4882  if (imageBuffer!=NULL)
4883    clEnv->library->clReleaseMemObject(imageBuffer);
4884#endif
4885
4886  /* CPU stuff */
4887  equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
4888  if (equalize_map == (PixelPacket *) NULL)
4889    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
4890
4891  map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
4892  if (map == (FloatPixelPacket *) NULL)
4893    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
4894
4895  /*
4896    Integrate the histogram to get the equalization map.
4897  */
4898  (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
4899  for (i=0; i <= (ssize_t) MaxMap; i++)
4900  {
4901    if ((channel & SyncChannels) != 0)
4902    {
4903      intensity.red+=histogram[i].s[2];
4904      map[i]=intensity;
4905      continue;
4906    }
4907    if ((channel & RedChannel) != 0)
4908      intensity.red+=histogram[i].s[2];
4909    if ((channel & GreenChannel) != 0)
4910      intensity.green+=histogram[i].s[1];
4911    if ((channel & BlueChannel) != 0)
4912      intensity.blue+=histogram[i].s[0];
4913    if ((channel & OpacityChannel) != 0)
4914      intensity.alpha+=histogram[i].s[3];
4915    /*
4916    if (((channel & IndexChannel) != 0) &&
4917        (image->colorspace == CMYKColorspace))
4918    {
4919      intensity.index+=histogram[i].index;
4920    }
4921    */
4922    map[i]=intensity;
4923  }
4924  black=map[0];
4925  white=map[(int) MaxMap];
4926  (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
4927  for (i=0; i <= (ssize_t) MaxMap; i++)
4928  {
4929    if ((channel & SyncChannels) != 0)
4930    {
4931      if (white.red != black.red)
4932        equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
4933                (map[i].red-black.red))/(white.red-black.red)));
4934      continue;
4935    }
4936    if (((channel & RedChannel) != 0) && (white.red != black.red))
4937      equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
4938              (map[i].red-black.red))/(white.red-black.red)));
4939    if (((channel & GreenChannel) != 0) && (white.green != black.green))
4940      equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
4941              (map[i].green-black.green))/(white.green-black.green)));
4942    if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
4943      equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
4944              (map[i].blue-black.blue))/(white.blue-black.blue)));
4945    if (((channel & OpacityChannel) != 0) && (white.alpha != black.alpha))
4946      equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
4947              (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
4948    /*
4949    if ((((channel & IndexChannel) != 0) &&
4950          (image->colorspace == CMYKColorspace)) &&
4951        (white.index != black.index))
4952      equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
4953              (map[i].index-black.index))/(white.index-black.index)));
4954    */
4955  }
4956
4957  if (image->storage_class == PseudoClass)
4958  {
4959    /*
4960       Equalize colormap.
4961       */
4962    for (i=0; i < (ssize_t) image->colors; i++)
4963    {
4964      if ((channel & SyncChannels) != 0)
4965      {
4966        if (white.red != black.red)
4967        {
4968          image->colormap[i].red=equalize_map[
4969            ScaleQuantumToMap(image->colormap[i].red)].red;
4970          image->colormap[i].green=equalize_map[
4971            ScaleQuantumToMap(image->colormap[i].green)].red;
4972          image->colormap[i].blue=equalize_map[
4973            ScaleQuantumToMap(image->colormap[i].blue)].red;
4974          image->colormap[i].alpha=equalize_map[
4975            ScaleQuantumToMap(image->colormap[i].alpha)].red;
4976        }
4977        continue;
4978      }
4979      if (((channel & RedChannel) != 0) && (white.red != black.red))
4980        image->colormap[i].red=equalize_map[
4981          ScaleQuantumToMap(image->colormap[i].red)].red;
4982      if (((channel & GreenChannel) != 0) && (white.green != black.green))
4983        image->colormap[i].green=equalize_map[
4984          ScaleQuantumToMap(image->colormap[i].green)].green;
4985      if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
4986        image->colormap[i].blue=equalize_map[
4987          ScaleQuantumToMap(image->colormap[i].blue)].blue;
4988      if (((channel & OpacityChannel) != 0) &&
4989          (white.alpha != black.alpha))
4990        image->colormap[i].alpha=equalize_map[
4991          ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
4992    }
4993  }
4994
4995  /*
4996    Equalize image.
4997  */
4998
4999  /* GPU can work on this again, image and equalize map as input
5000    image:        uchar4 (CLPixelPacket)
5001    equalize_map: uchar4 (PixelPacket)
5002    black, white: float4 (FloatPixelPacket) */
5003
5004#ifdef RECREATEBUFFER
5005  /* If the host pointer is aligned to the size of CLPixelPacket,
5006     then use the host buffer directly from the GPU; otherwise,
5007     create a buffer on the GPU and copy the data over */
5008  if (ALIGNED(inputPixels,CLPixelPacket))
5009  {
5010    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
5011  }
5012  else
5013  {
5014    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
5015  }
5016  /* create a CL buffer from image pixel buffer */
5017  length = image->columns * image->rows;
5018  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
5019  if (clStatus != CL_SUCCESS)
5020  {
5021    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5022    goto cleanup;
5023  }
5024#endif
5025
5026  /* Create and initialize OpenCL buffers. */
5027  if (ALIGNED(equalize_map, PixelPacket))
5028  {
5029    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
5030    hostPtr = equalize_map;
5031  }
5032  else
5033  {
5034    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
5035    hostPtr = equalize_map;
5036  }
5037  /* create a CL buffer for eqaulize_map  */
5038  length = (MaxMap+1);
5039  equalizeMapBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
5040  if (clStatus != CL_SUCCESS)
5041  {
5042    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5043    goto cleanup;
5044  }
5045
5046  /* get the OpenCL kernel */
5047  equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize");
5048  if (equalizeKernel == NULL)
5049  {
5050    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
5051    goto cleanup;
5052  }
5053
5054  /* set the kernel arguments */
5055  i = 0;
5056  clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5057  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
5058  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
5059  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
5060  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
5061  if (clStatus != CL_SUCCESS)
5062  {
5063    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
5064    goto cleanup;
5065  }
5066
5067  /* launch the kernel */
5068  global_work_size[0] = image->columns;
5069  global_work_size[1] = image->rows;
5070
5071  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
5072
5073  if (clStatus != CL_SUCCESS)
5074  {
5075    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
5076    goto cleanup;
5077  }
5078  clEnv->library->clFlush(queue);
5079
5080  /* read the data back */
5081  if (ALIGNED(inputPixels,CLPixelPacket))
5082  {
5083    length = image->columns * image->rows;
5084    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
5085  }
5086  else
5087  {
5088    length = image->columns * image->rows;
5089    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
5090  }
5091  if (clStatus != CL_SUCCESS)
5092  {
5093    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
5094    goto cleanup;
5095  }
5096
5097  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
5098
5099cleanup:
5100  OpenCLLogException(__FUNCTION__,__LINE__,exception);
5101
5102  image_view=DestroyCacheView(image_view);
5103
5104  if (imageBuffer!=NULL)
5105    clEnv->library->clReleaseMemObject(imageBuffer);
5106
5107  if (map!=NULL)
5108    map=(FloatPixelPacket *) RelinquishMagickMemory(map);
5109
5110  if (equalizeMapBuffer!=NULL)
5111    clEnv->library->clReleaseMemObject(equalizeMapBuffer);
5112  if (equalize_map!=NULL)
5113    equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
5114
5115  if (histogramBuffer!=NULL)
5116    clEnv->library->clReleaseMemObject(histogramBuffer);
5117  if (histogram!=NULL)
5118    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
5119
5120  if (histogramKernel!=NULL)
5121    RelinquishOpenCLKernel(clEnv, histogramKernel);
5122  if (equalizeKernel!=NULL)
5123    RelinquishOpenCLKernel(clEnv, equalizeKernel);
5124
5125  if (queue != NULL)
5126    RelinquishOpenCLCommandQueue(clEnv, queue);
5127
5128  return(outputReady);
5129}
5130
5131MagickExport MagickBooleanType AccelerateEqualizeImage(Image *image,
5132  const ChannelType channel,ExceptionInfo *exception)
5133{
5134  MagickBooleanType
5135    status;
5136
5137  assert(image != NULL);
5138  assert(exception != (ExceptionInfo *) NULL);
5139
5140  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
5141      (checkAccelerateCondition(image, channel) == MagickFalse) ||
5142      (checkHistogramCondition(image, channel) == MagickFalse))
5143    return(MagickFalse);
5144
5145  status=ComputeEqualizeImage(image,channel,exception);
5146  return(status);
5147}
5148
5149/*
5150%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5151%                                                                             %
5152%                                                                             %
5153%                                                                             %
5154%     C o n t r a s t S t r e t c h I m a g e  w i t h  O p e n C L           %
5155%                                                                             %
5156%                                                                             %
5157%                                                                             %
5158%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5159%
5160%  ContrastStretchImage() is a simple image enhancement technique that attempts
5161%  to improve the contrast in an image by `stretching' the range of intensity
5162%  values it contains to span a desired range of values. It differs from the
5163%  more sophisticated histogram equalization in that it can only apply a
5164%  linear scaling function to the image pixel values.  As a result the
5165%  `enhancement' is less harsh.
5166%
5167%  The format of the ContrastStretchImage method is:
5168%
5169%      MagickBooleanType ContrastStretchImage(Image *image,
5170%        const char *levels)
5171%      MagickBooleanType ContrastStretchImageChannel(Image *image,
5172%        const size_t channel,const double black_point,
5173%        const double white_point)
5174%
5175%  A description of each parameter follows:
5176%
5177%    o image: the image.
5178%
5179%    o channel: the channel.
5180%
5181%    o black_point: the black point.
5182%
5183%    o white_point: the white point.
5184%
5185%    o levels: Specify the levels where the black and white points have the
5186%      range of 0 to number-of-pixels (e.g. 1%, 10x90%, etc.).
5187%
5188*/
5189
5190MagickExport MagickBooleanType ComputeContrastStretchImageChannel(Image *image,
5191  const ChannelType channel,const double black_point,const double white_point,
5192  ExceptionInfo *exception)
5193{
5194#define ContrastStretchImageTag  "ContrastStretch/Image"
5195#define MaxRange(color)  ((MagickRealType) ScaleQuantumToMap((Quantum) (color)))
5196
5197  CacheView
5198    *image_view;
5199
5200  cl_command_queue
5201    queue;
5202
5203  cl_context
5204    context;
5205
5206  cl_int
5207    clStatus;
5208
5209  cl_mem_flags
5210    mem_flags;
5211
5212  cl_mem
5213    histogramBuffer,
5214    imageBuffer,
5215    stretchMapBuffer;
5216
5217  cl_kernel
5218    histogramKernel,
5219    stretchKernel;
5220
5221  cl_uint4
5222    *histogram;
5223
5224  double
5225    intensity;
5226
5227  FloatPixelPacket
5228    black,
5229    white;
5230
5231  MagickBooleanType
5232    outputReady,
5233    status;
5234
5235  MagickCLEnv
5236    clEnv;
5237
5238  MagickSizeType
5239    length;
5240
5241  PixelPacket
5242    *stretch_map;
5243
5244  register ssize_t
5245    i;
5246
5247  size_t
5248    global_work_size[2];
5249
5250  void
5251    *hostPtr,
5252    *inputPixels;
5253
5254  histogram=NULL;
5255  stretch_map=NULL;
5256  inputPixels = NULL;
5257  imageBuffer = NULL;
5258  histogramBuffer = NULL;
5259  stretchMapBuffer = NULL;
5260  histogramKernel = NULL;
5261  stretchKernel = NULL;
5262  context = NULL;
5263  queue = NULL;
5264  outputReady = MagickFalse;
5265
5266
5267  assert(image != (Image *) NULL);
5268  assert(image->signature == MagickSignature);
5269  if (image->debug != MagickFalse)
5270    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
5271
5272  //exception=(&image->exception);
5273
5274  /*
5275   * initialize opencl env
5276   */
5277  clEnv = GetDefaultOpenCLEnv();
5278  context = GetOpenCLContext(clEnv);
5279  queue = AcquireOpenCLCommandQueue(clEnv);
5280
5281  /*
5282    Allocate and initialize histogram arrays.
5283  */
5284  histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
5285
5286  if (histogram == (cl_uint4 *) NULL)
5287    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
5288
5289  /* reset histogram */
5290  (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
5291
5292  /*
5293  if (IsGrayImage(image,exception) != MagickFalse)
5294    (void) SetImageColorspace(image,GRAYColorspace);
5295  */
5296
5297  status=MagickTrue;
5298
5299
5300  /*
5301    Form histogram.
5302  */
5303  /* Create and initialize OpenCL buffers. */
5304  /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
5305  /* assume this  will get a writable image */
5306  image_view=AcquireAuthenticCacheView(image,exception);
5307  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
5308
5309  if (inputPixels == (void *) NULL)
5310  {
5311    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
5312    goto cleanup;
5313  }
5314  /* If the host pointer is aligned to the size of CLPixelPacket,
5315     then use the host buffer directly from the GPU; otherwise,
5316     create a buffer on the GPU and copy the data over */
5317  if (ALIGNED(inputPixels,CLPixelPacket))
5318  {
5319    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
5320  }
5321  else
5322  {
5323    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
5324  }
5325  /* create a CL buffer from image pixel buffer */
5326  length = image->columns * image->rows;
5327  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
5328  if (clStatus != CL_SUCCESS)
5329  {
5330    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5331    goto cleanup;
5332  }
5333
5334  /* If the host pointer is aligned to the size of cl_uint,
5335     then use the host buffer directly from the GPU; otherwise,
5336     create a buffer on the GPU and copy the data over */
5337  if (ALIGNED(histogram,cl_uint4))
5338  {
5339    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
5340    hostPtr = histogram;
5341  }
5342  else
5343  {
5344    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
5345    hostPtr = histogram;
5346  }
5347  /* create a CL buffer for histogram  */
5348  length = (MaxMap+1);
5349  histogramBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
5350  if (clStatus != CL_SUCCESS)
5351  {
5352    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5353    goto cleanup;
5354  }
5355
5356  status = LaunchHistogramKernel(clEnv, queue, imageBuffer, histogramBuffer, image, channel, exception);
5357  if (status == MagickFalse)
5358    goto cleanup;
5359
5360  /* read from the kenel output */
5361  if (ALIGNED(histogram,cl_uint4))
5362  {
5363    length = (MaxMap+1);
5364    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
5365  }
5366  else
5367  {
5368    length = (MaxMap+1);
5369    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
5370  }
5371  if (clStatus != CL_SUCCESS)
5372  {
5373    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
5374    goto cleanup;
5375  }
5376
5377  /* unmap, don't block gpu to use this buffer again.  */
5378  if (ALIGNED(histogram,cl_uint4))
5379  {
5380    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
5381    if (clStatus != CL_SUCCESS)
5382    {
5383      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
5384      goto cleanup;
5385    }
5386  }
5387
5388  /* recreate input buffer later, in case image updated */
5389#ifdef RECREATEBUFFER
5390  if (imageBuffer!=NULL)
5391    clEnv->library->clReleaseMemObject(imageBuffer);
5392#endif
5393
5394  /* CPU stuff */
5395  /*
5396     Find the histogram boundaries by locating the black/white levels.
5397  */
5398  black.red=0.0;
5399  white.red=MaxRange(QuantumRange);
5400  if ((channel & RedChannel) != 0)
5401  {
5402    intensity=0.0;
5403    for (i=0; i <= (ssize_t) MaxMap; i++)
5404    {
5405      intensity+=histogram[i].s[2];
5406      if (intensity > black_point)
5407        break;
5408    }
5409    black.red=(MagickRealType) i;
5410    intensity=0.0;
5411    for (i=(ssize_t) MaxMap; i != 0; i--)
5412    {
5413      intensity+=histogram[i].s[2];
5414      if (intensity > ((double) image->columns*image->rows-white_point))
5415        break;
5416    }
5417    white.red=(MagickRealType) i;
5418  }
5419  black.green=0.0;
5420  white.green=MaxRange(QuantumRange);
5421  if ((channel & GreenChannel) != 0)
5422  {
5423    intensity=0.0;
5424    for (i=0; i <= (ssize_t) MaxMap; i++)
5425    {
5426      intensity+=histogram[i].s[2];
5427      if (intensity > black_point)
5428        break;
5429    }
5430    black.green=(MagickRealType) i;
5431    intensity=0.0;
5432    for (i=(ssize_t) MaxMap; i != 0; i--)
5433    {
5434      intensity+=histogram[i].s[2];
5435      if (intensity > ((double) image->columns*image->rows-white_point))
5436        break;
5437    }
5438    white.green=(MagickRealType) i;
5439  }
5440  black.blue=0.0;
5441  white.blue=MaxRange(QuantumRange);
5442  if ((channel & BlueChannel) != 0)
5443  {
5444    intensity=0.0;
5445    for (i=0; i <= (ssize_t) MaxMap; i++)
5446    {
5447      intensity+=histogram[i].s[2];
5448      if (intensity > black_point)
5449        break;
5450    }
5451    black.blue=(MagickRealType) i;
5452    intensity=0.0;
5453    for (i=(ssize_t) MaxMap; i != 0; i--)
5454    {
5455      intensity+=histogram[i].s[2];
5456      if (intensity > ((double) image->columns*image->rows-white_point))
5457        break;
5458    }
5459    white.blue=(MagickRealType) i;
5460  }
5461  black.alpha=0.0;
5462  white.alpha=MaxRange(QuantumRange);
5463  if ((channel & OpacityChannel) != 0)
5464  {
5465    intensity=0.0;
5466    for (i=0; i <= (ssize_t) MaxMap; i++)
5467    {
5468      intensity+=histogram[i].s[2];
5469      if (intensity > black_point)
5470        break;
5471    }
5472    black.alpha=(MagickRealType) i;
5473    intensity=0.0;
5474    for (i=(ssize_t) MaxMap; i != 0; i--)
5475    {
5476      intensity+=histogram[i].s[2];
5477      if (intensity > ((double) image->columns*image->rows-white_point))
5478        break;
5479    }
5480    white.alpha=(MagickRealType) i;
5481  }
5482  /*
5483  black.index=0.0;
5484  white.index=MaxRange(QuantumRange);
5485  if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace))
5486  {
5487    intensity=0.0;
5488    for (i=0; i <= (ssize_t) MaxMap; i++)
5489    {
5490      intensity+=histogram[i].index;
5491      if (intensity > black_point)
5492        break;
5493    }
5494    black.index=(MagickRealType) i;
5495    intensity=0.0;
5496    for (i=(ssize_t) MaxMap; i != 0; i--)
5497    {
5498      intensity+=histogram[i].index;
5499      if (intensity > ((double) image->columns*image->rows-white_point))
5500        break;
5501    }
5502    white.index=(MagickRealType) i;
5503  }
5504  */
5505
5506
5507  stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
5508    sizeof(*stretch_map));
5509
5510  if (stretch_map == (PixelPacket *) NULL)
5511    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
5512      image->filename);
5513
5514  /*
5515    Stretch the histogram to create the stretched image mapping.
5516  */
5517  (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
5518  for (i=0; i <= (ssize_t) MaxMap; i++)
5519  {
5520    if ((channel & RedChannel) != 0)
5521    {
5522      if (i < (ssize_t) black.red)
5523        stretch_map[i].red=(Quantum) 0;
5524      else
5525        if (i > (ssize_t) white.red)
5526          stretch_map[i].red=QuantumRange;
5527        else
5528          if (black.red != white.red)
5529            stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
5530                  (i-black.red)/(white.red-black.red)));
5531    }
5532    if ((channel & GreenChannel) != 0)
5533    {
5534      if (i < (ssize_t) black.green)
5535        stretch_map[i].green=0;
5536      else
5537        if (i > (ssize_t) white.green)
5538          stretch_map[i].green=QuantumRange;
5539        else
5540          if (black.green != white.green)
5541            stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
5542                  (i-black.green)/(white.green-black.green)));
5543    }
5544    if ((channel & BlueChannel) != 0)
5545    {
5546      if (i < (ssize_t) black.blue)
5547        stretch_map[i].blue=0;
5548      else
5549        if (i > (ssize_t) white.blue)
5550          stretch_map[i].blue= QuantumRange;
5551        else
5552          if (black.blue != white.blue)
5553            stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
5554                  (i-black.blue)/(white.blue-black.blue)));
5555    }
5556    if ((channel & OpacityChannel) != 0)
5557    {
5558      if (i < (ssize_t) black.alpha)
5559        stretch_map[i].alpha=0;
5560      else
5561        if (i > (ssize_t) white.alpha)
5562          stretch_map[i].alpha=QuantumRange;
5563        else
5564          if (black.alpha != white.alpha)
5565            stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
5566                  (i-black.alpha)/(white.alpha-black.alpha)));
5567    }
5568    /*
5569    if (((channel & IndexChannel) != 0) &&
5570        (image->colorspace == CMYKColorspace))
5571    {
5572      if (i < (ssize_t) black.index)
5573        stretch_map[i].index=0;
5574      else
5575        if (i > (ssize_t) white.index)
5576          stretch_map[i].index=QuantumRange;
5577        else
5578          if (black.index != white.index)
5579            stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap*
5580                  (i-black.index)/(white.index-black.index)));
5581    }
5582    */
5583  }
5584
5585  /*
5586    Stretch the image.
5587  */
5588  if (((channel & OpacityChannel) != 0) || (((channel & IndexChannel) != 0) &&
5589      (image->colorspace == CMYKColorspace)))
5590    image->storage_class=DirectClass;
5591  if (image->storage_class == PseudoClass)
5592  {
5593    /*
5594       Stretch colormap.
5595       */
5596    for (i=0; i < (ssize_t) image->colors; i++)
5597    {
5598      if ((channel & RedChannel) != 0)
5599      {
5600        if (black.red != white.red)
5601          image->colormap[i].red=stretch_map[
5602            ScaleQuantumToMap(image->colormap[i].red)].red;
5603      }
5604      if ((channel & GreenChannel) != 0)
5605      {
5606        if (black.green != white.green)
5607          image->colormap[i].green=stretch_map[
5608            ScaleQuantumToMap(image->colormap[i].green)].green;
5609      }
5610      if ((channel & BlueChannel) != 0)
5611      {
5612        if (black.blue != white.blue)
5613          image->colormap[i].blue=stretch_map[
5614            ScaleQuantumToMap(image->colormap[i].blue)].blue;
5615      }
5616      if ((channel & OpacityChannel) != 0)
5617      {
5618        if (black.alpha != white.alpha)
5619          image->colormap[i].alpha=stretch_map[
5620            ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
5621      }
5622    }
5623  }
5624
5625  /*
5626    Stretch image.
5627  */
5628
5629
5630  /* GPU can work on this again, image and equalize map as input
5631    image:        uchar4 (CLPixelPacket)
5632    stretch_map:  uchar4 (PixelPacket)
5633    black, white: float4 (FloatPixelPacket) */
5634
5635#ifdef RECREATEBUFFER
5636  /* If the host pointer is aligned to the size of CLPixelPacket,
5637     then use the host buffer directly from the GPU; otherwise,
5638     create a buffer on the GPU and copy the data over */
5639  if (ALIGNED(inputPixels,CLPixelPacket))
5640  {
5641    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
5642  }
5643  else
5644  {
5645    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
5646  }
5647  /* create a CL buffer from image pixel buffer */
5648  length = image->columns * image->rows;
5649  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
5650  if (clStatus != CL_SUCCESS)
5651  {
5652    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5653    goto cleanup;
5654  }
5655#endif
5656
5657  /* Create and initialize OpenCL buffers. */
5658  if (ALIGNED(stretch_map, PixelPacket))
5659  {
5660    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
5661    hostPtr = stretch_map;
5662  }
5663  else
5664  {
5665    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
5666    hostPtr = stretch_map;
5667  }
5668  /* create a CL buffer for stretch_map  */
5669  length = (MaxMap+1);
5670  stretchMapBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
5671  if (clStatus != CL_SUCCESS)
5672  {
5673    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5674    goto cleanup;
5675  }
5676
5677  /* get the OpenCL kernel */
5678  stretchKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Stretch");
5679  if (stretchKernel == NULL)
5680  {
5681    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
5682    goto cleanup;
5683  }
5684
5685  /* set the kernel arguments */
5686  i = 0;
5687  clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5688  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&channel);
5689  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
5690  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&white);
5691  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&black);
5692  if (clStatus != CL_SUCCESS)
5693  {
5694    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
5695    goto cleanup;
5696  }
5697
5698  /* launch the kernel */
5699  global_work_size[0] = image->columns;
5700  global_work_size[1] = image->rows;
5701
5702  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
5703
5704  if (clStatus != CL_SUCCESS)
5705  {
5706    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
5707    goto cleanup;
5708  }
5709  clEnv->library->clFlush(queue);
5710
5711  /* read the data back */
5712  if (ALIGNED(inputPixels,CLPixelPacket))
5713  {
5714    length = image->columns * image->rows;
5715    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
5716  }
5717  else
5718  {
5719    length = image->columns * image->rows;
5720    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
5721  }
5722  if (clStatus != CL_SUCCESS)
5723  {
5724    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
5725    goto cleanup;
5726  }
5727
5728  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
5729
5730cleanup:
5731  OpenCLLogException(__FUNCTION__,__LINE__,exception);
5732
5733  image_view=DestroyCacheView(image_view);
5734
5735  if (imageBuffer!=NULL)
5736    clEnv->library->clReleaseMemObject(imageBuffer);
5737
5738  if (stretchMapBuffer!=NULL)
5739    clEnv->library->clReleaseMemObject(stretchMapBuffer);
5740  if (stretch_map!=NULL)
5741    stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
5742
5743
5744  if (histogramBuffer!=NULL)
5745    clEnv->library->clReleaseMemObject(histogramBuffer);
5746  if (histogram!=NULL)
5747    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
5748
5749
5750  if (histogramKernel!=NULL)
5751    RelinquishOpenCLKernel(clEnv, histogramKernel);
5752  if (stretchKernel!=NULL)
5753    RelinquishOpenCLKernel(clEnv, stretchKernel);
5754
5755  if (queue != NULL)
5756    RelinquishOpenCLCommandQueue(clEnv, queue);
5757
5758  return(outputReady);
5759}
5760
5761MagickExport MagickBooleanType AccelerateContrastStretchImageChannel(
5762  Image *image,const ChannelType channel,const double black_point,
5763  const double white_point,ExceptionInfo *exception)
5764{
5765  MagickBooleanType
5766    status;
5767
5768  assert(image != NULL);
5769  assert(exception != (ExceptionInfo *) NULL);
5770
5771  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
5772      (checkAccelerateCondition(image, channel) == MagickFalse) ||
5773      (checkHistogramCondition(image, channel) == MagickFalse))
5774    return(MagickFalse);
5775
5776  status=ComputeContrastStretchImageChannel(image,channel, black_point, white_point, exception);
5777  return(status);
5778}
5779
5780/*
5781%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5782%                                                                             %
5783%                                                                             %
5784%                                                                             %
5785%     D e s p e c k l e I m a g e  w i t h  O p e n C L                       %
5786%                                                                             %
5787%                                                                             %
5788%                                                                             %
5789%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5790%
5791%  DespeckleImage() reduces the speckle noise in an image while perserving the
5792%  edges of the original image.  A speckle removing filter uses a complementary
5793%  hulling technique (raising pixels that are darker than their surrounding
5794%  neighbors, then complementarily lowering pixels that are brighter than their
5795%  surrounding neighbors) to reduce the speckle index of that image (reference
5796%  Crimmins speckle removal).
5797%
5798%  The format of the DespeckleImage method is:
5799%
5800%      Image *DespeckleImage(const Image *image,ExceptionInfo *exception)
5801%
5802%  A description of each parameter follows:
5803%
5804%    o image: the image.
5805%
5806%    o exception: return any errors or warnings in this structure.
5807%
5808*/
5809
5810static Image *ComputeDespeckleImage(const Image *image,
5811  ExceptionInfo*exception)
5812{
5813  static const int
5814    X[4] = {0, 1, 1,-1},
5815    Y[4] = {1, 0, 1, 1};
5816
5817  CacheView
5818    *filteredImage_view,
5819    *image_view;
5820
5821  cl_command_queue
5822    queue;
5823
5824  cl_context
5825    context;
5826
5827  cl_int
5828    clStatus;
5829
5830  cl_kernel
5831    hullPass1,
5832    hullPass2;
5833
5834  cl_mem_flags
5835    mem_flags;
5836
5837  cl_mem
5838    filteredImageBuffer,
5839    imageBuffer,
5840    tempImageBuffer[2];
5841
5842  const void
5843    *inputPixels;
5844
5845  Image
5846    *filteredImage;
5847
5848  int
5849    k,
5850    matte;
5851
5852  MagickBooleanType
5853    outputReady;
5854
5855  MagickCLEnv
5856    clEnv;
5857
5858  MagickSizeType
5859    length;
5860
5861  size_t
5862    global_work_size[2];
5863
5864  unsigned int
5865    imageHeight,
5866    imageWidth;
5867
5868  void
5869    *filteredPixels,
5870    *hostPtr;
5871
5872  outputReady = MagickFalse;
5873  clEnv = NULL;
5874  inputPixels = NULL;
5875  filteredImage = NULL;
5876  filteredImage_view = NULL;
5877  filteredPixels = NULL;
5878  context = NULL;
5879  imageBuffer = NULL;
5880  filteredImageBuffer = NULL;
5881  hullPass1 = NULL;
5882  hullPass2 = NULL;
5883  queue = NULL;
5884  tempImageBuffer[0] = tempImageBuffer[1] = NULL;
5885  clEnv = GetDefaultOpenCLEnv();
5886  context = GetOpenCLContext(clEnv);
5887  queue = AcquireOpenCLCommandQueue(clEnv);
5888
5889  image_view=AcquireVirtualCacheView(image,exception);
5890  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
5891  if (inputPixels == (void *) NULL)
5892  {
5893    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
5894    goto cleanup;
5895  }
5896
5897  if (ALIGNED(inputPixels,CLPixelPacket))
5898  {
5899    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
5900  }
5901  else
5902  {
5903    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
5904  }
5905  /* create a CL buffer from image pixel buffer */
5906  length = image->columns * image->rows;
5907  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
5908  if (clStatus != CL_SUCCESS)
5909  {
5910    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5911    goto cleanup;
5912  }
5913
5914  mem_flags = CL_MEM_READ_WRITE;
5915  length = image->columns * image->rows;
5916  for (k = 0; k < 2; k++)
5917  {
5918    tempImageBuffer[k] = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
5919    if (clStatus != CL_SUCCESS)
5920    {
5921      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5922      goto cleanup;
5923    }
5924  }
5925
5926  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
5927  assert(filteredImage != NULL);
5928  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
5929  {
5930    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
5931    goto cleanup;
5932  }
5933  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
5934  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
5935  if (filteredPixels == (void *) NULL)
5936  {
5937    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
5938    goto cleanup;
5939  }
5940
5941  if (ALIGNED(filteredPixels,CLPixelPacket))
5942  {
5943    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
5944    hostPtr = filteredPixels;
5945  }
5946  else
5947  {
5948    mem_flags = CL_MEM_WRITE_ONLY;
5949    hostPtr = NULL;
5950  }
5951  /* create a CL buffer from image pixel buffer */
5952  length = image->columns * image->rows;
5953  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
5954  if (clStatus != CL_SUCCESS)
5955  {
5956    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5957    goto cleanup;
5958  }
5959
5960  hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1");
5961  hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2");
5962
5963  clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
5964  clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
5965  imageWidth = image->columns;
5966  clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
5967  imageHeight = image->rows;
5968  clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
5969  matte = (image->alpha_trait==BlendPixelTrait)?0:1;
5970  clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
5971  if (clStatus != CL_SUCCESS)
5972  {
5973    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
5974    goto cleanup;
5975  }
5976
5977  clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
5978  clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
5979  imageWidth = image->columns;
5980  clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
5981  imageHeight = image->rows;
5982  clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
5983  matte = (image->alpha_trait==BlendPixelTrait)?0:1;
5984  clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
5985  if (clStatus != CL_SUCCESS)
5986  {
5987    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
5988    goto cleanup;
5989  }
5990
5991
5992  global_work_size[0] = image->columns;
5993  global_work_size[1] = image->rows;
5994
5995
5996  for (k = 0; k < 4; k++)
5997  {
5998    cl_int2 offset;
5999    int polarity;
6000
6001
6002    offset.s[0] = X[k];
6003    offset.s[1] = Y[k];
6004    polarity = 1;
6005    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
6006    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
6007    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
6008    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
6009    if (clStatus != CL_SUCCESS)
6010    {
6011      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6012      goto cleanup;
6013    }
6014    /* launch the kernel */
6015    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
6016    if (clStatus != CL_SUCCESS)
6017    {
6018      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6019      goto cleanup;
6020    }
6021    /* launch the kernel */
6022    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
6023    if (clStatus != CL_SUCCESS)
6024    {
6025      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6026      goto cleanup;
6027    }
6028
6029
6030    if (k == 0)
6031      clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
6032    offset.s[0] = -X[k];
6033    offset.s[1] = -Y[k];
6034    polarity = 1;
6035    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
6036    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
6037    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
6038    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
6039    if (clStatus != CL_SUCCESS)
6040    {
6041      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6042      goto cleanup;
6043    }
6044    /* launch the kernel */
6045    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
6046    if (clStatus != CL_SUCCESS)
6047    {
6048      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6049      goto cleanup;
6050    }
6051    /* launch the kernel */
6052    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
6053    if (clStatus != CL_SUCCESS)
6054    {
6055      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6056      goto cleanup;
6057    }
6058
6059    offset.s[0] = -X[k];
6060    offset.s[1] = -Y[k];
6061    polarity = -1;
6062    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
6063    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
6064    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
6065    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
6066    if (clStatus != CL_SUCCESS)
6067    {
6068      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6069      goto cleanup;
6070    }
6071    /* launch the kernel */
6072    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
6073    if (clStatus != CL_SUCCESS)
6074    {
6075      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6076      goto cleanup;
6077    }
6078    /* launch the kernel */
6079    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
6080    if (clStatus != CL_SUCCESS)
6081    {
6082      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6083      goto cleanup;
6084    }
6085
6086    offset.s[0] = X[k];
6087    offset.s[1] = Y[k];
6088    polarity = -1;
6089    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
6090    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
6091    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
6092    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
6093
6094    if (k == 3)
6095      clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
6096
6097    if (clStatus != CL_SUCCESS)
6098    {
6099      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6100      goto cleanup;
6101    }
6102    /* launch the kernel */
6103    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
6104    if (clStatus != CL_SUCCESS)
6105    {
6106      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6107      goto cleanup;
6108    }
6109    /* launch the kernel */
6110    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, NULL);
6111    if (clStatus != CL_SUCCESS)
6112    {
6113      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6114      goto cleanup;
6115    }
6116  }
6117
6118  if (ALIGNED(filteredPixels,CLPixelPacket))
6119  {
6120    length = image->columns * image->rows;
6121    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
6122  }
6123  else
6124  {
6125    length = image->columns * image->rows;
6126    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
6127  }
6128  if (clStatus != CL_SUCCESS)
6129  {
6130    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
6131    goto cleanup;
6132  }
6133
6134  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
6135
6136cleanup:
6137  OpenCLLogException(__FUNCTION__,__LINE__,exception);
6138
6139  image_view=DestroyCacheView(image_view);
6140  if (filteredImage_view != NULL)
6141    filteredImage_view=DestroyCacheView(filteredImage_view);
6142
6143  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
6144  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
6145  for (k = 0; k < 2; k++)
6146  {
6147    if (tempImageBuffer[k]!=NULL)	      clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
6148  }
6149  if (filteredImageBuffer!=NULL)	      clEnv->library->clReleaseMemObject(filteredImageBuffer);
6150  if (hullPass1!=NULL)			      RelinquishOpenCLKernel(clEnv, hullPass1);
6151  if (hullPass2!=NULL)			      RelinquishOpenCLKernel(clEnv, hullPass2);
6152  if (outputReady == MagickFalse && filteredImage != NULL)
6153    filteredImage=DestroyImage(filteredImage);
6154  return(filteredImage);
6155}
6156
6157MagickExport Image *AccelerateDespeckleImage(const Image* image,
6158  ExceptionInfo* exception)
6159{
6160  Image
6161    *filteredImage;
6162
6163  assert(image != NULL);
6164  assert(exception != (ExceptionInfo *) NULL);
6165
6166  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
6167      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
6168    return NULL;
6169
6170  filteredImage=ComputeDespeckleImage(image,exception);
6171  return(filteredImage);
6172}
6173
6174static Image *ComputeAddNoiseImage(const Image *image,
6175  const ChannelType channel,const NoiseType noise_type,
6176  ExceptionInfo *exception)
6177{
6178  CacheView
6179    *filteredImage_view,
6180    *image_view;
6181
6182  cl_command_queue
6183    queue;
6184
6185  cl_context
6186    context;
6187
6188  cl_int
6189    inputPixelCount,
6190    pixelsPerWorkitem,
6191    clStatus;
6192
6193  cl_uint
6194    seed0,
6195    seed1;
6196
6197  cl_kernel
6198    addNoiseKernel;
6199
6200  cl_mem_flags
6201    mem_flags;
6202
6203  cl_mem
6204    filteredImageBuffer,
6205    imageBuffer;
6206
6207  const char
6208    *option;
6209
6210  const void
6211    *inputPixels;
6212
6213  float
6214    attenuate;
6215
6216  MagickBooleanType
6217    outputReady;
6218
6219  MagickCLEnv
6220    clEnv;
6221
6222  MagickSizeType
6223    length;
6224
6225  Image
6226    *filteredImage;
6227
6228  RandomInfo
6229    **restrict random_info;
6230
6231  size_t
6232    global_work_size[1],
6233    local_work_size[1];
6234
6235  unsigned int
6236    k,
6237    numRandomNumberPerPixel;
6238
6239#if defined(MAGICKCORE_OPENMP_SUPPORT)
6240  unsigned long
6241    key;
6242#endif
6243
6244  void
6245    *filteredPixels,
6246    *hostPtr;
6247
6248  outputReady = MagickFalse;
6249  clEnv = NULL;
6250  inputPixels = NULL;
6251  filteredImage = NULL;
6252  filteredImage_view = NULL;
6253  filteredPixels = NULL;
6254  context = NULL;
6255  imageBuffer = NULL;
6256  filteredImageBuffer = NULL;
6257  queue = NULL;
6258  addNoiseKernel = NULL;
6259
6260  clEnv = GetDefaultOpenCLEnv();
6261  context = GetOpenCLContext(clEnv);
6262  queue = AcquireOpenCLCommandQueue(clEnv);
6263
6264  image_view=AcquireVirtualCacheView(image,exception);
6265  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
6266  if (inputPixels == (void *) NULL)
6267  {
6268    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
6269    goto cleanup;
6270  }
6271
6272  if (ALIGNED(inputPixels,CLPixelPacket))
6273  {
6274    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
6275  }
6276  else
6277  {
6278    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
6279  }
6280  /* create a CL buffer from image pixel buffer */
6281  length = image->columns * image->rows;
6282  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
6283  if (clStatus != CL_SUCCESS)
6284  {
6285    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6286    goto cleanup;
6287  }
6288
6289
6290  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
6291  assert(filteredImage != NULL);
6292  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
6293  {
6294    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
6295    goto cleanup;
6296  }
6297  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
6298  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
6299  if (filteredPixels == (void *) NULL)
6300  {
6301    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
6302    goto cleanup;
6303  }
6304
6305  if (ALIGNED(filteredPixels,CLPixelPacket))
6306  {
6307    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
6308    hostPtr = filteredPixels;
6309  }
6310  else
6311  {
6312    mem_flags = CL_MEM_WRITE_ONLY;
6313    hostPtr = NULL;
6314  }
6315  /* create a CL buffer from image pixel buffer */
6316  length = image->columns * image->rows;
6317  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
6318  if (clStatus != CL_SUCCESS)
6319  {
6320    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6321    goto cleanup;
6322  }
6323
6324  /* find out how many random numbers needed by pixel */
6325  numRandomNumberPerPixel = 0;
6326  {
6327    unsigned int numRandPerChannel = 0;
6328    switch (noise_type)
6329    {
6330    case UniformNoise:
6331    case ImpulseNoise:
6332    case LaplacianNoise:
6333    case RandomNoise:
6334    default:
6335      numRandPerChannel = 1;
6336      break;
6337    case GaussianNoise:
6338    case MultiplicativeGaussianNoise:
6339    case PoissonNoise:
6340      numRandPerChannel = 2;
6341      break;
6342    };
6343
6344    if ((channel & RedChannel) != 0)
6345      numRandomNumberPerPixel+=numRandPerChannel;
6346    if ((channel & GreenChannel) != 0)
6347      numRandomNumberPerPixel+=numRandPerChannel;
6348    if ((channel & BlueChannel) != 0)
6349      numRandomNumberPerPixel+=numRandPerChannel;
6350    if ((channel & OpacityChannel) != 0)
6351      numRandomNumberPerPixel+=numRandPerChannel;
6352  }
6353
6354  /* set up the random number generators */
6355  attenuate=1.0;
6356  option=GetImageArtifact(image,"attenuate");
6357  if (option != (char *) NULL)
6358    attenuate=StringToDouble(option,(char **) NULL);
6359  random_info=AcquireRandomInfoThreadSet();
6360#if defined(MAGICKCORE_OPENMP_SUPPORT)
6361  key=GetRandomSecretKey(random_info[0]);
6362  (void) key;
6363#endif
6364
6365  addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"GenerateNoiseImage");
6366
6367  {
6368    cl_uint computeUnitCount;
6369    cl_uint workItemCount;
6370    clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &computeUnitCount, NULL);
6371    workItemCount = computeUnitCount * 2 * 256;			// 256 work items per group, 2 groups per CU
6372    inputPixelCount = image->columns * image->rows;
6373    pixelsPerWorkitem = (inputPixelCount + workItemCount - 1) / workItemCount;
6374    pixelsPerWorkitem = ((pixelsPerWorkitem + 3) / 4) * 4;
6375
6376    local_work_size[0] = 256;
6377    global_work_size[0] = workItemCount;
6378  }
6379  {
6380    RandomInfo* randomInfo = AcquireRandomInfo();
6381	const unsigned long* s = GetRandomInfoSeed(randomInfo);
6382	seed0 = s[0];
6383	GetPseudoRandomValue(randomInfo);
6384	seed1 = s[0];
6385	randomInfo = DestroyRandomInfo(randomInfo);
6386  }
6387
6388  k = 0;
6389  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&imageBuffer);
6390  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
6391  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&inputPixelCount);
6392  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
6393  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
6394  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
6395  attenuate=1.0f;
6396  option=GetImageArtifact(image,"attenuate");
6397  if (option != (char *) NULL)
6398    attenuate=(float)StringToDouble(option,(char **) NULL);
6399  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
6400  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&seed0);
6401  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&seed1);
6402  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
6403
6404  clEnv->library->clEnqueueNDRangeKernel(queue,addNoiseKernel,1,NULL,global_work_size,NULL,0,NULL,NULL);
6405
6406  if (ALIGNED(filteredPixels,CLPixelPacket))
6407  {
6408    length = image->columns * image->rows;
6409    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
6410  }
6411  else
6412  {
6413    length = image->columns * image->rows;
6414    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
6415  }
6416  if (clStatus != CL_SUCCESS)
6417  {
6418    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
6419    goto cleanup;
6420  }
6421
6422  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
6423
6424cleanup:
6425  OpenCLLogException(__FUNCTION__,__LINE__,exception);
6426
6427  image_view=DestroyCacheView(image_view);
6428  if (filteredImage_view != NULL)
6429    filteredImage_view=DestroyCacheView(filteredImage_view);
6430
6431  if (queue!=NULL)                  RelinquishOpenCLCommandQueue(clEnv, queue);
6432  if (addNoiseKernel!=NULL)         RelinquishOpenCLKernel(clEnv, addNoiseKernel);
6433  if (imageBuffer!=NULL)		    clEnv->library->clReleaseMemObject(imageBuffer);
6434  if (filteredImageBuffer!=NULL)	  clEnv->library->clReleaseMemObject(filteredImageBuffer);
6435  if (outputReady == MagickFalse && filteredImage != NULL)
6436    filteredImage=DestroyImage(filteredImage);
6437
6438  return(filteredImage);
6439}
6440
6441
6442MagickExport Image *AccelerateAddNoiseImage(const Image *image,
6443  const ChannelType channel,const NoiseType noise_type,
6444  ExceptionInfo *exception)
6445{
6446  Image
6447    *filteredImage;
6448
6449  assert(image != NULL);
6450  assert(exception != (ExceptionInfo *) NULL);
6451
6452  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
6453      (checkAccelerateCondition(image, channel) == MagickFalse))
6454    return NULL;
6455
6456  filteredImage = ComputeAddNoiseImage(image,channel,noise_type,exception);
6457
6458  return(filteredImage);
6459}
6460
6461static MagickBooleanType LaunchRandomImageKernel(MagickCLEnv clEnv,
6462  cl_command_queue queue,cl_mem imageBuffer,const unsigned int imageColumns,
6463  const unsigned int imageRows,cl_mem seedBuffer,
6464  const unsigned int numGenerators,ExceptionInfo *exception)
6465{
6466  int
6467    k;
6468
6469  cl_int
6470    clStatus;
6471
6472  cl_kernel
6473    randomImageKernel;
6474
6475  MagickBooleanType
6476    status;
6477
6478  size_t
6479    global_work_size,
6480    local_work_size;
6481
6482  status = MagickFalse;
6483  randomImageKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RandomImage");
6484
6485  k = 0;
6486  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_mem),(void*)&imageBuffer);
6487  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_uint),(void*)&imageColumns);
6488  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_uint),(void*)&imageRows);
6489  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_mem),(void*)&seedBuffer);
6490  {
6491    const float randNormNumerator = 1.0f;
6492    const unsigned int randNormDenominator = (unsigned int)(~0UL);
6493    clEnv->library->clSetKernelArg(randomImageKernel,k++,
6494          sizeof(float),(void*)&randNormNumerator);
6495    clEnv->library->clSetKernelArg(randomImageKernel,k++,
6496          sizeof(cl_uint),(void*)&randNormDenominator);
6497  }
6498
6499
6500  global_work_size = numGenerators;
6501  local_work_size = 64;
6502
6503  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue,randomImageKernel,1,NULL,&global_work_size,
6504                                    &local_work_size,0,NULL,NULL);
6505
6506  if (clStatus != CL_SUCCESS)
6507  {
6508    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning,
6509                                      "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6510    goto cleanup;
6511  }
6512  status = MagickTrue;
6513
6514cleanup:
6515  if (randomImageKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomImageKernel);
6516  return(status);
6517}
6518
6519static MagickBooleanType ComputeRandomImage(Image* image,
6520  ExceptionInfo* exception)
6521{
6522  CacheView
6523    *image_view;
6524
6525  cl_command_queue
6526    queue;
6527
6528  cl_context
6529    context;
6530
6531  cl_int
6532    clStatus;
6533
6534  /* Don't release this buffer in this function !!! */
6535  cl_mem
6536    randomNumberSeedsBuffer;
6537
6538  cl_mem_flags
6539    mem_flags;
6540
6541  cl_mem
6542   imageBuffer;
6543
6544  MagickBooleanType
6545    outputReady,
6546    status;
6547
6548  MagickCLEnv
6549    clEnv;
6550
6551  MagickSizeType
6552    length;
6553
6554  void
6555    *inputPixels;
6556
6557  status = MagickFalse;
6558  outputReady = MagickFalse;
6559  inputPixels = NULL;
6560  context = NULL;
6561  imageBuffer = NULL;
6562  queue = NULL;
6563
6564  clEnv = GetDefaultOpenCLEnv();
6565  context = GetOpenCLContext(clEnv);
6566
6567  /* Create and initialize OpenCL buffers. */
6568  image_view=AcquireAuthenticCacheView(image,exception);
6569  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
6570  if (inputPixels == (void *) NULL)
6571  {
6572    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
6573    goto cleanup;
6574  }
6575
6576  /* If the host pointer is aligned to the size of CLPixelPacket,
6577     then use the host buffer directly from the GPU; otherwise,
6578     create a buffer on the GPU and copy the data over */
6579  if (ALIGNED(inputPixels,CLPixelPacket))
6580  {
6581    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
6582  }
6583  else
6584  {
6585    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
6586  }
6587  /* create a CL buffer from image pixel buffer */
6588  length = image->columns * image->rows;
6589  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
6590  if (clStatus != CL_SUCCESS)
6591  {
6592    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6593    goto cleanup;
6594  }
6595
6596  queue = AcquireOpenCLCommandQueue(clEnv);
6597
6598  randomNumberSeedsBuffer = GetAndLockRandSeedBuffer(clEnv);
6599  if (randomNumberSeedsBuffer==NULL)
6600  {
6601    (void) OpenCLThrowMagickException(exception, GetMagickModule(),
6602           ResourceLimitWarning, "Failed to get GPU random number generators.",
6603           "'%s'", ".");
6604    goto cleanup;
6605  }
6606
6607  status = LaunchRandomImageKernel(clEnv,queue,
6608                                   imageBuffer,
6609                                   image->columns,
6610                                   image->rows,
6611                                   randomNumberSeedsBuffer,
6612                                   GetNumRandGenerators(clEnv),
6613                                   exception);
6614  if (status==MagickFalse)
6615  {
6616    goto cleanup;
6617  }
6618
6619  if (ALIGNED(inputPixels,CLPixelPacket))
6620  {
6621    length = image->columns * image->rows;
6622    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
6623  }
6624  else
6625  {
6626    length = image->columns * image->rows;
6627    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
6628  }
6629  if (clStatus != CL_SUCCESS)
6630  {
6631    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
6632    goto cleanup;
6633  }
6634  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
6635
6636cleanup:
6637  OpenCLLogException(__FUNCTION__,__LINE__,exception);
6638
6639  image_view=DestroyCacheView(image_view);
6640
6641  UnlockRandSeedBuffer(clEnv);
6642  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
6643  if (queue != NULL)                  RelinquishOpenCLCommandQueue(clEnv, queue);
6644  return outputReady;
6645}
6646
6647MagickExport MagickBooleanType AccelerateRandomImage(Image *image,
6648  ExceptionInfo* exception)
6649{
6650  MagickBooleanType
6651    status;
6652
6653  assert(image != NULL);
6654  assert(exception != (ExceptionInfo *) NULL);
6655
6656  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
6657      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
6658    return(MagickFalse);
6659
6660  status=ComputeRandomImage(image,exception);
6661  return(status);
6662}
6663
6664static Image* ComputeMotionBlurImage(const Image *image,
6665  const ChannelType channel,const double *kernel,const size_t width,
6666  const OffsetInfo *offset,ExceptionInfo *exception)
6667{
6668  CacheView
6669    *filteredImage_view,
6670    *image_view;
6671
6672  cl_command_queue
6673    queue;
6674
6675  cl_context
6676    context;
6677
6678  cl_float4
6679    biasPixel;
6680
6681  cl_int
6682    clStatus;
6683
6684  cl_kernel
6685    motionBlurKernel;
6686
6687  cl_mem
6688    filteredImageBuffer,
6689    imageBuffer,
6690    imageKernelBuffer,
6691    offsetBuffer;
6692
6693  cl_mem_flags
6694    mem_flags;
6695
6696  const void
6697    *inputPixels;
6698
6699  float
6700    *kernelBufferPtr;
6701
6702  Image
6703    *filteredImage;
6704
6705  int
6706    *offsetBufferPtr;
6707
6708  MagickBooleanType
6709    outputReady;
6710
6711  MagickCLEnv
6712   clEnv;
6713
6714  PixelInfo
6715    bias;
6716
6717  MagickSizeType
6718    length;
6719
6720  size_t
6721    global_work_size[2],
6722    local_work_size[2];
6723
6724  unsigned int
6725    i,
6726    imageHeight,
6727    imageWidth,
6728    matte;
6729
6730  void
6731    *filteredPixels,
6732    *hostPtr;
6733
6734  outputReady = MagickFalse;
6735  context = NULL;
6736  filteredImage = NULL;
6737  filteredImage_view = NULL;
6738  imageBuffer = NULL;
6739  filteredImageBuffer = NULL;
6740  imageKernelBuffer = NULL;
6741  motionBlurKernel = NULL;
6742  queue = NULL;
6743
6744  clEnv = GetDefaultOpenCLEnv();
6745  context = GetOpenCLContext(clEnv);
6746
6747  /* Create and initialize OpenCL buffers. */
6748
6749  image_view=AcquireVirtualCacheView(image,exception);
6750  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
6751  if (inputPixels == (const void *) NULL)
6752  {
6753    (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
6754      "UnableToReadPixelCache.","`%s'",image->filename);
6755    goto cleanup;
6756  }
6757
6758  // If the host pointer is aligned to the size of CLPixelPacket,
6759  // then use the host buffer directly from the GPU; otherwise,
6760  // create a buffer on the GPU and copy the data over
6761  if (ALIGNED(inputPixels,CLPixelPacket))
6762  {
6763    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
6764  }
6765  else
6766  {
6767    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
6768  }
6769  // create a CL buffer from image pixel buffer
6770  length = image->columns * image->rows;
6771  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags,
6772    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
6773  if (clStatus != CL_SUCCESS)
6774  {
6775    (void) ThrowMagickException(exception, GetMagickModule(),
6776      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
6777    goto cleanup;
6778  }
6779
6780
6781  filteredImage = CloneImage(image,image->columns,image->rows,
6782    MagickTrue,exception);
6783  assert(filteredImage != NULL);
6784  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
6785  {
6786    (void) ThrowMagickException(exception, GetMagickModule(),
6787      ResourceLimitError, "CloneImage failed.", "'%s'", ".");
6788    goto cleanup;
6789  }
6790  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
6791  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
6792  if (filteredPixels == (void *) NULL)
6793  {
6794    (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
6795      "UnableToReadPixelCache.","`%s'",filteredImage->filename);
6796    goto cleanup;
6797  }
6798
6799  if (ALIGNED(filteredPixels,CLPixelPacket))
6800  {
6801    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
6802    hostPtr = filteredPixels;
6803  }
6804  else
6805  {
6806    mem_flags = CL_MEM_WRITE_ONLY;
6807    hostPtr = NULL;
6808  }
6809  // create a CL buffer from image pixel buffer
6810  length = image->columns * image->rows;
6811  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags,
6812    length * sizeof(CLPixelPacket), hostPtr, &clStatus);
6813  if (clStatus != CL_SUCCESS)
6814  {
6815    (void) ThrowMagickException(exception, GetMagickModule(),
6816      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
6817    goto cleanup;
6818  }
6819
6820
6821  imageKernelBuffer = clEnv->library->clCreateBuffer(context,
6822    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
6823    &clStatus);
6824  if (clStatus != CL_SUCCESS)
6825  {
6826    (void) ThrowMagickException(exception, GetMagickModule(),
6827      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
6828    goto cleanup;
6829  }
6830
6831  queue = AcquireOpenCLCommandQueue(clEnv);
6832  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
6833    CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
6834  if (clStatus != CL_SUCCESS)
6835  {
6836    (void) ThrowMagickException(exception, GetMagickModule(),
6837      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
6838    goto cleanup;
6839  }
6840  for (i = 0; i < width; i++)
6841  {
6842    kernelBufferPtr[i] = (float) kernel[i];
6843  }
6844  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
6845    0, NULL, NULL);
6846 if (clStatus != CL_SUCCESS)
6847  {
6848    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
6849      "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
6850    goto cleanup;
6851  }
6852
6853  offsetBuffer = clEnv->library->clCreateBuffer(context,
6854    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
6855    &clStatus);
6856  if (clStatus != CL_SUCCESS)
6857  {
6858    (void) ThrowMagickException(exception, GetMagickModule(),
6859      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
6860    goto cleanup;
6861  }
6862
6863  offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
6864    CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
6865  if (clStatus != CL_SUCCESS)
6866  {
6867    (void) ThrowMagickException(exception, GetMagickModule(),
6868      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
6869    goto cleanup;
6870  }
6871  for (i = 0; i < width; i++)
6872  {
6873    offsetBufferPtr[2*i] = (int)offset[i].x;
6874    offsetBufferPtr[2*i+1] = (int)offset[i].y;
6875  }
6876  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
6877    NULL, NULL);
6878 if (clStatus != CL_SUCCESS)
6879  {
6880    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
6881      "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
6882    goto cleanup;
6883  }
6884
6885
6886 // get the OpenCL kernel
6887  motionBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE,
6888    "MotionBlur");
6889  if (motionBlurKernel == NULL)
6890  {
6891    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
6892      "AcquireOpenCLKernel failed.", "'%s'", ".");
6893    goto cleanup;
6894  }
6895
6896  // set the kernel arguments
6897  i = 0;
6898  clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
6899    (void *)&imageBuffer);
6900  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
6901    (void *)&filteredImageBuffer);
6902  imageWidth = image->columns;
6903  imageHeight = image->rows;
6904  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
6905    &imageWidth);
6906  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
6907    &imageHeight);
6908  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
6909    (void *)&imageKernelBuffer);
6910  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
6911    &width);
6912  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
6913    (void *)&offsetBuffer);
6914
6915  GetPixelInfo(image,&bias);
6916  biasPixel.s[0] = bias.red;
6917  biasPixel.s[1] = bias.green;
6918  biasPixel.s[2] = bias.blue;
6919  biasPixel.s[3] = bias.alpha;
6920  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
6921
6922  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &channel);
6923  matte = (image->alpha_trait==BlendPixelTrait)?1:0;
6924  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
6925  if (clStatus != CL_SUCCESS)
6926  {
6927    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
6928      "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6929    goto cleanup;
6930  }
6931
6932  // launch the kernel
6933  local_work_size[0] = 16;
6934  local_work_size[1] = 16;
6935  global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
6936                                image->columns,local_work_size[0]);
6937  global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
6938                                image->rows,local_work_size[1]);
6939  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
6940    global_work_size, local_work_size, 0, NULL, NULL);
6941
6942  if (clStatus != CL_SUCCESS)
6943  {
6944    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
6945      "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6946    goto cleanup;
6947  }
6948  clEnv->library->clFlush(queue);
6949
6950  if (ALIGNED(filteredPixels,CLPixelPacket))
6951  {
6952    length = image->columns * image->rows;
6953    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
6954      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
6955      NULL, &clStatus);
6956  }
6957  else
6958  {
6959    length = image->columns * image->rows;
6960    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
6961      length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
6962  }
6963  if (clStatus != CL_SUCCESS)
6964  {
6965    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
6966      "Reading output image from CL buffer failed.", "'%s'", ".");
6967    goto cleanup;
6968  }
6969  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
6970
6971cleanup:
6972
6973  image_view=DestroyCacheView(image_view);
6974  if (filteredImage_view != NULL)
6975    filteredImage_view=DestroyCacheView(filteredImage_view);
6976
6977  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
6978  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
6979  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
6980  if (motionBlurKernel!=NULL)  RelinquishOpenCLKernel(clEnv, motionBlurKernel);
6981  if (queue != NULL)           RelinquishOpenCLCommandQueue(clEnv, queue);
6982  if (outputReady == MagickFalse && filteredImage != NULL)
6983    filteredImage=DestroyImage(filteredImage);
6984
6985  return(filteredImage);
6986}
6987
6988MagickExport Image *AccelerateMotionBlurImage(const Image *image,
6989  const ChannelType channel,const double* kernel,const size_t width,
6990  const OffsetInfo *offset,ExceptionInfo *exception)
6991{
6992  Image
6993    *filteredImage;
6994
6995  assert(image != NULL);
6996  assert(kernel != (double *) NULL);
6997  assert(offset != (OffsetInfo *) NULL);
6998  assert(exception != (ExceptionInfo *) NULL);
6999
7000  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
7001      (checkAccelerateCondition(image, channel) == MagickFalse))
7002    return NULL;
7003
7004  filteredImage=ComputeMotionBlurImage(image, channel, kernel, width,
7005    offset, exception);
7006  return(filteredImage);
7007}
7008
7009static MagickBooleanType LaunchCompositeKernel(MagickCLEnv clEnv,
7010  cl_command_queue queue,cl_mem imageBuffer,const unsigned int inputWidth,
7011  const unsigned int inputHeight,const unsigned int matte,
7012  const ChannelType channel,const CompositeOperator compose,
7013  const cl_mem compositeImageBuffer,const unsigned int compositeWidth,
7014  const unsigned int compositeHeight,const float destination_dissolve,
7015  const float source_dissolve,ExceptionInfo *magick_unused(exception))
7016{
7017  cl_int
7018    clStatus;
7019
7020  cl_kernel
7021    compositeKernel;
7022
7023  int
7024    k;
7025
7026  size_t
7027    global_work_size[2],
7028    local_work_size[2];
7029
7030  unsigned int
7031    composeOp;
7032
7033  magick_unreferenced(exception);
7034
7035  compositeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE,
7036    "Composite");
7037
7038  k = 0;
7039  clStatus=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&imageBuffer);
7040  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputWidth);
7041  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputHeight);
7042  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&compositeImageBuffer);
7043  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeWidth);
7044  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeHeight);
7045  composeOp = (unsigned int)compose;
7046  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&composeOp);
7047  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(ChannelType),(void*)&channel);
7048  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&matte);
7049  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&destination_dissolve);
7050  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&source_dissolve);
7051
7052  if (clStatus!=CL_SUCCESS)
7053    return MagickFalse;
7054
7055  local_work_size[0] = 64;
7056  local_work_size[1] = 1;
7057
7058  global_work_size[0] = padGlobalWorkgroupSizeToLocalWorkgroupSize(inputWidth,
7059    local_work_size[0]);
7060  global_work_size[1] = inputHeight;
7061  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, compositeKernel, 2, NULL,
7062    global_work_size, local_work_size, 0, NULL, NULL);
7063
7064
7065  RelinquishOpenCLKernel(clEnv, compositeKernel);
7066
7067  return((clStatus==CL_SUCCESS) ? MagickTrue : MagickFalse);
7068}
7069
7070static MagickBooleanType ComputeCompositeImage(Image *image,
7071  const ChannelType channel,const CompositeOperator compose,
7072  const Image *compositeImage,const ssize_t magick_unused(x_offset),
7073  const ssize_t magick_unused(y_offset),const float destination_dissolve,
7074  const float source_dissolve,ExceptionInfo *exception)
7075{
7076  CacheView
7077    *image_view;
7078
7079  cl_command_queue
7080    queue;
7081
7082  cl_context
7083    context;
7084
7085  cl_int
7086    clStatus;
7087
7088  cl_mem_flags
7089    mem_flags;
7090
7091  cl_mem
7092    compositeImageBuffer,
7093    imageBuffer;
7094
7095  const void
7096    *composePixels;
7097
7098  MagickBooleanType
7099    outputReady,
7100    status;
7101
7102  MagickCLEnv
7103    clEnv;
7104
7105  MagickSizeType
7106    length;
7107
7108  void
7109    *inputPixels;
7110
7111  magick_unreferenced(x_offset);
7112  magick_unreferenced(y_offset);
7113
7114  status = MagickFalse;
7115  outputReady = MagickFalse;
7116  composePixels = NULL;
7117  imageBuffer = NULL;
7118  compositeImageBuffer = NULL;
7119
7120  clEnv = GetDefaultOpenCLEnv();
7121  context = GetOpenCLContext(clEnv);
7122  queue = AcquireOpenCLCommandQueue(clEnv);
7123
7124  /* Create and initialize OpenCL buffers. */
7125  image_view=AcquireAuthenticCacheView(image,exception);
7126  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
7127  if (inputPixels == (void *) NULL)
7128  {
7129    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
7130      "UnableToReadPixelCache.","`%s'",image->filename);
7131    goto cleanup;
7132  }
7133
7134  /* If the host pointer is aligned to the size of CLPixelPacket,
7135     then use the host buffer directly from the GPU; otherwise,
7136     create a buffer on the GPU and copy the data over */
7137  if (ALIGNED(inputPixels,CLPixelPacket))
7138  {
7139    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
7140  }
7141  else
7142  {
7143    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
7144  }
7145  /* create a CL buffer from image pixel buffer */
7146  length = image->columns * image->rows;
7147  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags,
7148    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
7149  if (clStatus != CL_SUCCESS)
7150  {
7151    (void) OpenCLThrowMagickException(exception, GetMagickModule(),
7152      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
7153    goto cleanup;
7154  }
7155
7156
7157  /* Create and initialize OpenCL buffers. */
7158  composePixels = AcquirePixelCachePixels(compositeImage, &length, exception);
7159  if (composePixels == (void *) NULL)
7160  {
7161    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
7162      "UnableToReadPixelCache.","`%s'",compositeImage->filename);
7163    goto cleanup;
7164  }
7165
7166  /* If the host pointer is aligned to the size of CLPixelPacket,
7167     then use the host buffer directly from the GPU; otherwise,
7168     create a buffer on the GPU and copy the data over */
7169  if (ALIGNED(composePixels,CLPixelPacket))
7170  {
7171    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
7172  }
7173  else
7174  {
7175    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
7176  }
7177  /* create a CL buffer from image pixel buffer */
7178  length = compositeImage->columns * compositeImage->rows;
7179  compositeImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags,
7180    length * sizeof(CLPixelPacket), (void*)composePixels, &clStatus);
7181  if (clStatus != CL_SUCCESS)
7182  {
7183    (void) OpenCLThrowMagickException(exception, GetMagickModule(),
7184      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
7185    goto cleanup;
7186  }
7187
7188  status = LaunchCompositeKernel(clEnv,queue,imageBuffer,
7189           (unsigned int) image->columns,
7190           (unsigned int) image->rows,
7191           (unsigned int) (image->alpha_trait==BlendPixelTrait) ? 1 : 0,
7192           channel, compose, compositeImageBuffer,
7193           (unsigned int) compositeImage->columns,
7194           (unsigned int) compositeImage->rows,
7195           destination_dissolve,source_dissolve,
7196           exception);
7197
7198  if (status==MagickFalse)
7199    goto cleanup;
7200
7201  length = image->columns * image->rows;
7202  if (ALIGNED(inputPixels,CLPixelPacket))
7203  {
7204    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE,
7205      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
7206      NULL, &clStatus);
7207  }
7208  else
7209  {
7210    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0,
7211      length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
7212  }
7213  if (clStatus==CL_SUCCESS)
7214    outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
7215
7216cleanup:
7217
7218  image_view=DestroyCacheView(image_view);
7219  if (imageBuffer!=NULL)      clEnv->library->clReleaseMemObject(imageBuffer);
7220  if (compositeImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(compositeImageBuffer);
7221  if (queue != NULL)               RelinquishOpenCLCommandQueue(clEnv, queue);
7222
7223  return(outputReady);
7224}
7225
7226MagickExport MagickBooleanType AccelerateCompositeImage(Image *image,
7227  const ChannelType channel,const CompositeOperator compose,
7228  const Image *composite,const ssize_t x_offset,const ssize_t y_offset,
7229  const float destination_dissolve,const float source_dissolve,
7230  ExceptionInfo *exception)
7231{
7232  MagickBooleanType
7233    status;
7234
7235  assert(image != NULL);
7236  assert(exception != (ExceptionInfo *) NULL);
7237
7238  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
7239      (checkAccelerateCondition(image, channel) == MagickFalse))
7240    return(MagickFalse);
7241
7242  /* only support zero offset and
7243     images with the size for now */
7244  if (x_offset!=0
7245    || y_offset!=0
7246    || image->columns!=composite->columns
7247    || image->rows!=composite->rows)
7248    return MagickFalse;
7249
7250  switch(compose) {
7251  case ColorDodgeCompositeOp:
7252  case BlendCompositeOp:
7253    break;
7254  default:
7255    // unsupported compose operator, quit
7256    return MagickFalse;
7257  };
7258
7259  status = ComputeCompositeImage(image,channel,compose,composite,
7260    x_offset,y_offset,destination_dissolve,source_dissolve,exception);
7261
7262  return(status);
7263}
7264
7265#else  /* MAGICKCORE_OPENCL_SUPPORT  */
7266
7267MagickExport Image *AccelerateConvolveImageChannel(
7268  const Image *magick_unused(image),const ChannelType magick_unused(channel),
7269  const KernelInfo *magick_unused(kernel),
7270  ExceptionInfo *magick_unused(exception))
7271{
7272  magick_unreferenced(image);
7273  magick_unreferenced(channel);
7274  magick_unreferenced(kernel);
7275  magick_unreferenced(exception);
7276
7277  return NULL;
7278}
7279
7280MagickExport MagickBooleanType AccelerateFunctionImage(
7281  Image *magick_unused(image),const ChannelType magick_unused(channel),
7282  const MagickFunction magick_unused(function),
7283  const size_t magick_unused(number_parameters),
7284  const double *magick_unused(parameters),
7285  ExceptionInfo *magick_unused(exception))
7286{
7287  magick_unreferenced(image);
7288  magick_unreferenced(channel);
7289  magick_unreferenced(function);
7290  magick_unreferenced(number_parameters);
7291  magick_unreferenced(parameters);
7292  magick_unreferenced(exception);
7293
7294  return MagickFalse;
7295}
7296
7297MagickExport Image *AccelerateBlurImage(const Image *magick_unused(image),
7298  const ChannelType magick_unused(channel),const double magick_unused(radius),
7299  const double magick_unused(sigma),ExceptionInfo *magick_unused(exception))
7300{
7301  magick_unreferenced(image);
7302  magick_unreferenced(channel);
7303  magick_unreferenced(radius);
7304  magick_unreferenced(sigma);
7305  magick_unreferenced(exception);
7306
7307  return NULL;
7308}
7309
7310MagickExport Image *AccelerateRotationalBlurImage(
7311  const Image *magick_unused(image),const ChannelType magick_unused(channel),
7312  const double magick_unused(angle),ExceptionInfo *magick_unused(exception))
7313{
7314  magick_unreferenced(image);
7315  magick_unreferenced(channel);
7316  magick_unreferenced(angle);
7317  magick_unreferenced(exception);
7318
7319  return NULL;
7320}
7321
7322
7323MagickExport Image *AccelerateUnsharpMaskImage(
7324  const Image *magick_unused(image),const ChannelType magick_unused(channel),
7325  const double magick_unused(radius),const double magick_unused(sigma),
7326  const double magick_unused(gain),const double magick_unused(threshold),
7327  ExceptionInfo *magick_unused(exception))
7328{
7329  magick_unreferenced(image);
7330  magick_unreferenced(channel);
7331  magick_unreferenced(radius);
7332  magick_unreferenced(sigma);
7333  magick_unreferenced(gain);
7334  magick_unreferenced(threshold);
7335  magick_unreferenced(exception);
7336
7337  return NULL;
7338}
7339
7340MagickExport
7341MagickBooleanType AccelerateCompositeImage(Image *image,
7342  const ChannelType channel,const CompositeOperator compose,
7343  const Image *composite,const ssize_t x_offset,const ssize_t y_offset,
7344  const float destination_dissolve,const float source_dissolve,
7345  ExceptionInfo *exception)
7346{
7347  magick_unreferenced(image);
7348  magick_unreferenced(channel);
7349  magick_unreferenced(compose);
7350  magick_unreferenced(composite);
7351  magick_unreferenced(x_offset);
7352  magick_unreferenced(y_offset);
7353  magick_unreferenced(destination_dissolve);
7354  magick_unreferenced(source_dissolve);
7355  magick_unreferenced(exception);
7356
7357  return MagickFalse;
7358}
7359
7360
7361MagickExport MagickBooleanType AccelerateContrastImage(
7362  Image* magick_unused(image),const MagickBooleanType magick_unused(sharpen),
7363  ExceptionInfo* magick_unused(exception))
7364{
7365  magick_unreferenced(image);
7366  magick_unreferenced(sharpen);
7367  magick_unreferenced(exception);
7368
7369  return MagickFalse;
7370}
7371
7372MagickExport MagickBooleanType AccelerateContrastStretchImageChannel(
7373    Image * image, const ChannelType channel, const double black_point, const double white_point,
7374    ExceptionInfo* magick_unused(exception))
7375{
7376  magick_unreferenced(image);
7377  magick_unreferenced(channel);
7378  magick_unreferenced(black_point);
7379  magick_unreferenced(white_point);
7380  magick_unreferenced(exception);
7381
7382  return MagickFalse;
7383}
7384
7385MagickExport MagickBooleanType AccelerateEqualizeImage(
7386  Image* magick_unused(image), const ChannelType magick_unused(channel),
7387  ExceptionInfo* magick_unused(exception))
7388{
7389  magick_unreferenced(image);
7390  magick_unreferenced(channel);
7391  magick_unreferenced(exception);
7392
7393  return MagickFalse;
7394}
7395
7396MagickExport Image *AccelerateDespeckleImage(const Image* magick_unused(image),
7397  ExceptionInfo* magick_unused(exception))
7398{
7399  magick_unreferenced(image);
7400  magick_unreferenced(exception);
7401
7402  return NULL;
7403}
7404
7405MagickExport Image *AccelerateResizeImage(const Image* magick_unused(image),
7406  const size_t magick_unused(resizedColumns),
7407  const size_t magick_unused(resizedRows),
7408  const ResizeFilter* magick_unused(resizeFilter),
7409  ExceptionInfo *magick_unused(exception))
7410{
7411  magick_unreferenced(image);
7412  magick_unreferenced(resizedColumns);
7413  magick_unreferenced(resizedRows);
7414  magick_unreferenced(resizeFilter);
7415  magick_unreferenced(exception);
7416
7417  return NULL;
7418}
7419
7420MagickExport
7421MagickBooleanType AccelerateModulateImage(
7422  Image* image, double percent_brightness, double percent_hue,
7423  double percent_saturation, ColorspaceType colorspace, ExceptionInfo* exception)
7424{
7425  magick_unreferenced(image);
7426  magick_unreferenced(percent_brightness);
7427  magick_unreferenced(percent_hue);
7428  magick_unreferenced(percent_saturation);
7429  magick_unreferenced(colorspace);
7430  magick_unreferenced(exception);
7431  return(MagickFalse);
7432}
7433
7434MagickExport
7435MagickBooleanType AccelerateNegateImageChannel(
7436  Image* image, const ChannelType channel, const MagickBooleanType grayscale, ExceptionInfo* exception)
7437{
7438  magick_unreferenced(image);
7439  magick_unreferenced(channel);
7440  magick_unreferenced(grayscale);
7441  magick_unreferenced(exception);
7442  return(MagickFalse);
7443}
7444
7445MagickExport
7446MagickBooleanType AccelerateGrayscaleImage(
7447  Image* image, const PixelIntensityMethod method, ExceptionInfo* exception)
7448{
7449  magick_unreferenced(image);
7450  magick_unreferenced(method);
7451  magick_unreferenced(exception);
7452  return(MagickFalse);
7453}
7454
7455MagickExport Image *AccelerateAddNoiseImage(const Image *image,
7456  const ChannelType channel, const NoiseType noise_type,ExceptionInfo *exception)
7457{
7458  magick_unreferenced(image);
7459  magick_unreferenced(channel);
7460  magick_unreferenced(noise_type);
7461  magick_unreferenced(exception);
7462  return NULL;
7463}
7464
7465
7466MagickExport MagickBooleanType AccelerateRandomImage(Image* image, ExceptionInfo* exception)
7467{
7468  magick_unreferenced(image);
7469  magick_unreferenced(exception);
7470  return MagickFalse;
7471}
7472
7473MagickExport
7474Image* AccelerateMotionBlurImage(const Image *image, const ChannelType channel,
7475                                const double* kernel, const size_t width,
7476                                const OffsetInfo *offset,
7477                                ExceptionInfo *exception)
7478{
7479  magick_unreferenced(image);
7480  magick_unreferenced(channel);
7481  magick_unreferenced(kernel);
7482  magick_unreferenced(width);
7483  magick_unreferenced(offset);
7484  magick_unreferenced(exception);
7485  return NULL;
7486}
7487
7488#endif /* MAGICKCORE_OPENCL_SUPPORT */
7489
7490MagickExport MagickBooleanType AccelerateConvolveImage(
7491  const Image *magick_unused(image),const KernelInfo *magick_unused(kernel),
7492  Image *magick_unused(convolve_image),ExceptionInfo *magick_unused(exception))
7493{
7494  magick_unreferenced(image);
7495  magick_unreferenced(kernel);
7496  magick_unreferenced(convolve_image);
7497  magick_unreferenced(exception);
7498
7499  /* legacy, do not use */
7500  return(MagickFalse);
7501}
7502
7503