accelerate.c revision 7ce65e7125a4e1df1a274ce373c537a9df9c16cd
1/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3%                                                                             %
4%                                                                             %
5%                                                                             %
6%     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
7%    A   A   C       C      E      L      E      R   R  A   A    T    E       %
8%    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
9%    A   A   C       C      E      L      E      R R    A   A    T    E       %
10%    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
11%                                                                             %
12%                                                                             %
13%                       MagickCore Acceleration Methods                       %
14%                                                                             %
15%                              Software Design                                %
16%                                  Cristy                                     %
17%                               SiuChi Chan                                   %
18%                               Guansong Zhang                                %
19%                               January 2010                                  %
20%                                                                             %
21%                                                                             %
22%  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
23%  dedicated to making software imaging solutions freely available.           %
24%                                                                             %
25%  You may not use this file except in compliance with the License.  You may  %
26%  obtain a copy of the License at                                            %
27%                                                                             %
28%    http://www.imagemagick.org/script/license.php                            %
29%                                                                             %
30%  Unless required by applicable law or agreed to in writing, software        %
31%  distributed under the License is distributed on an "AS IS" BASIS,          %
32%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
33%  See the License for the specific language governing permissions and        %
34%  limitations under the License.                                             %
35%                                                                             %
36%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
37*/
38
39/*
40Include declarations.
41*/
42#include "MagickCore/studio.h"
43#include "MagickCore/accelerate.h"
44#include "MagickCore/accelerate-private.h"
45#include "MagickCore/artifact.h"
46#include "MagickCore/cache.h"
47#include "MagickCore/cache-private.h"
48#include "MagickCore/cache-view.h"
49#include "MagickCore/color-private.h"
50#include "MagickCore/delegate-private.h"
51#include "MagickCore/enhance.h"
52#include "MagickCore/exception.h"
53#include "MagickCore/exception-private.h"
54#include "MagickCore/gem.h"
55#include "MagickCore/hashmap.h"
56#include "MagickCore/image.h"
57#include "MagickCore/image-private.h"
58#include "MagickCore/list.h"
59#include "MagickCore/memory_.h"
60#include "MagickCore/monitor-private.h"
61#include "MagickCore/accelerate.h"
62#include "MagickCore/opencl.h"
63#include "MagickCore/opencl-private.h"
64#include "MagickCore/option.h"
65#include "MagickCore/pixel-accessor.h"
66#include "MagickCore/pixel-private.h"
67#include "MagickCore/prepress.h"
68#include "MagickCore/quantize.h"
69#include "MagickCore/quantum-private.h"
70#include "MagickCore/random_.h"
71#include "MagickCore/random-private.h"
72#include "MagickCore/registry.h"
73#include "MagickCore/resize.h"
74#include "MagickCore/resize-private.h"
75#include "MagickCore/semaphore.h"
76#include "MagickCore/splay-tree.h"
77#include "MagickCore/statistic.h"
78#include "MagickCore/string_.h"
79#include "MagickCore/string-private.h"
80#include "MagickCore/token.h"
81
82#ifdef MAGICKCORE_CLPERFMARKER
83#include "CLPerfMarker.h"
84#endif
85
86#define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
87#define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
88
89#if defined(MAGICKCORE_OPENCL_SUPPORT)
90
91/*
92  Define declarations.
93*/
94#define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
95
96/*
97  Static declarations.
98*/
99static const ResizeWeightingFunctionType supportedResizeWeighting[] =
100{
101  BoxWeightingFunction,
102  TriangleWeightingFunction,
103  HannWeightingFunction,
104  HammingWeightingFunction,
105  BlackmanWeightingFunction,
106  CubicBCWeightingFunction,
107  SincWeightingFunction,
108  SincFastWeightingFunction,
109  LastWeightingFunction
110};
111
112/*
113  Forward declarations.
114*/
115static Image *ComputeUnsharpMaskImageSingle(const Image *image,
116  const ChannelType channel,const double radius,const double sigma,
117  const double gain,const double threshold,int blurOnly, ExceptionInfo *exception);
118
119/*
120  Helper functions.
121*/
122static MagickBooleanType checkAccelerateCondition(const Image* image,
123  const ChannelType channel)
124{
125  /* check if the image's colorspace is supported */
126  if (image->colorspace != RGBColorspace &&
127      image->colorspace != sRGBColorspace &&
128      image->colorspace != GRAYColorspace)
129    return(MagickFalse);
130
131  /* check if the channel is supported */
132  if (((channel & RedChannel) == 0) ||
133      ((channel & GreenChannel) == 0) ||
134      ((channel & BlueChannel) == 0))
135    return(MagickFalse);
136
137  /* check if the virtual pixel method is compatible with the OpenCL implementation */
138  if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
139      (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
140    return(MagickFalse);
141
142  /* check if the image has read / write mask */
143  if (image->read_mask != MagickFalse || image->write_mask != MagickFalse)
144    return(MagickFalse);
145
146  /* check if pixel order is RGBA */
147  if (GetPixelChannelOffset(image,RedPixelChannel) != 0 ||
148      GetPixelChannelOffset(image,GreenPixelChannel) != 1 ||
149      GetPixelChannelOffset(image,BluePixelChannel) != 2 ||
150      GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
151    return(MagickFalse);
152
153  /* check if all channels are available */
154  if (((GetPixelRedTraits(image) & UpdatePixelTrait) == 0) ||
155      ((GetPixelGreenTraits(image) & UpdatePixelTrait) == 0) ||
156      ((GetPixelBlueTraits(image) & UpdatePixelTrait) == 0) ||
157      (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
158    return(MagickFalse);
159
160  return(MagickTrue);
161}
162
163static MagickBooleanType checkHistogramCondition(Image *image,
164  const ChannelType channel)
165{
166  /* ensure this is the only pass get in for now. */
167  if ((channel & SyncChannels) == 0)
168    return MagickFalse;
169
170  if (image->intensity == Rec601LuminancePixelIntensityMethod ||
171      image->intensity == Rec709LuminancePixelIntensityMethod)
172    return MagickFalse;
173
174  if (image->colorspace != sRGBColorspace)
175    return MagickFalse;
176
177  return MagickTrue;
178}
179
180static MagickBooleanType checkOpenCLEnvironment(ExceptionInfo* exception)
181{
182  MagickBooleanType
183    flag;
184
185  MagickCLEnv
186    clEnv;
187
188  clEnv=GetDefaultOpenCLEnv();
189
190  GetMagickOpenCLEnvParam(clEnv,MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED,
191    sizeof(MagickBooleanType),&flag,exception);
192  if (flag != MagickFalse)
193    return(MagickFalse);
194
195  GetMagickOpenCLEnvParam(clEnv,MAGICK_OPENCL_ENV_PARAM_OPENCL_INITIALIZED,
196    sizeof(MagickBooleanType),&flag,exception);
197  if (flag == MagickFalse)
198    {
199      if (InitOpenCLEnv(clEnv,exception) == MagickFalse)
200        return(MagickFalse);
201
202      GetMagickOpenCLEnvParam(clEnv,MAGICK_OPENCL_ENV_PARAM_OPENCL_DISABLED,
203        sizeof(MagickBooleanType),&flag,exception);
204      if (flag != MagickFalse)
205        return(MagickFalse);
206    }
207
208  return(MagickTrue);
209}
210
211/* pad the global workgroup size to the next multiple of
212   the local workgroup size */
213inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
214  const unsigned int orgGlobalSize,const unsigned int localGroupSize)
215{
216  return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
217}
218
219static MagickBooleanType splitImage(const Image* image)
220{
221  MagickBooleanType
222    split;
223
224  MagickCLEnv
225    clEnv;
226
227  unsigned long
228    allocSize,
229    tempSize;
230
231  clEnv=GetDefaultOpenCLEnv();
232
233  allocSize=GetOpenCLDeviceMaxMemAllocSize(clEnv);
234  tempSize=(unsigned long) (image->columns * image->rows * 4 * 4);
235
236  split = ((tempSize > allocSize) ? MagickTrue : MagickFalse);
237  return(split);
238}
239
240/*
241%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
242%                                                                             %
243%                                                                             %
244%                                                                             %
245%     A c c e l e r a t e A d d N o i s e I m a g e                           %
246%                                                                             %
247%                                                                             %
248%                                                                             %
249%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
250*/
251
252static Image *ComputeAddNoiseImage(const Image *image,
253  const ChannelType channel,const NoiseType noise_type,
254  ExceptionInfo *exception)
255{
256  CacheView
257    *filteredImage_view,
258    *image_view;
259
260  cl_command_queue
261    queue;
262
263  cl_context
264    context;
265
266  cl_int
267    inputPixelCount,
268    pixelsPerWorkitem,
269    clStatus;
270
271  cl_uint
272    seed0,
273    seed1;
274
275  cl_kernel
276    addNoiseKernel;
277
278  cl_event
279    event;
280
281  cl_mem_flags
282    mem_flags;
283
284  cl_mem
285    filteredImageBuffer,
286    imageBuffer;
287
288  const char
289    *option;
290
291  const void
292    *inputPixels;
293
294  float
295    attenuate;
296
297  MagickBooleanType
298    outputReady;
299
300  MagickCLEnv
301    clEnv;
302
303  MagickSizeType
304    length;
305
306  Image
307    *filteredImage;
308
309  RandomInfo
310    **magick_restrict random_info;
311
312  size_t
313    global_work_size[1],
314    local_work_size[1];
315
316  unsigned int
317    k,
318    numRandomNumberPerPixel;
319
320#if defined(MAGICKCORE_OPENMP_SUPPORT)
321  unsigned long
322    key;
323#endif
324
325  void
326    *filteredPixels,
327    *hostPtr;
328
329  outputReady = MagickFalse;
330  clEnv = NULL;
331  inputPixels = NULL;
332  filteredImage = NULL;
333  filteredImage_view = NULL;
334  filteredPixels = NULL;
335  context = NULL;
336  imageBuffer = NULL;
337  filteredImageBuffer = NULL;
338  queue = NULL;
339  addNoiseKernel = NULL;
340
341  clEnv = GetDefaultOpenCLEnv();
342  context = GetOpenCLContext(clEnv);
343  queue = AcquireOpenCLCommandQueue(clEnv);
344
345  image_view=AcquireVirtualCacheView(image,exception);
346  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
347  if (inputPixels == (void *) NULL)
348  {
349    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
350    goto cleanup;
351  }
352
353  if (ALIGNED(inputPixels,CLPixelPacket))
354  {
355    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
356  }
357  else
358  {
359    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
360  }
361  /* create a CL buffer from image pixel buffer */
362  length = image->columns * image->rows;
363  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
364  if (clStatus != CL_SUCCESS)
365  {
366    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
367    goto cleanup;
368  }
369
370
371  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
372  assert(filteredImage != NULL);
373  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
374  {
375    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
376    goto cleanup;
377  }
378  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
379  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
380  if (filteredPixels == (void *) NULL)
381  {
382    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
383    goto cleanup;
384  }
385
386  if (ALIGNED(filteredPixels,CLPixelPacket))
387  {
388    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
389    hostPtr = filteredPixels;
390  }
391  else
392  {
393    mem_flags = CL_MEM_WRITE_ONLY;
394    hostPtr = NULL;
395  }
396  /* create a CL buffer from image pixel buffer */
397  length = image->columns * image->rows;
398  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
399  if (clStatus != CL_SUCCESS)
400  {
401    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
402    goto cleanup;
403  }
404
405  /* find out how many random numbers needed by pixel */
406  numRandomNumberPerPixel = 0;
407  {
408    unsigned int numRandPerChannel = 0;
409    switch (noise_type)
410    {
411    case UniformNoise:
412    case ImpulseNoise:
413    case LaplacianNoise:
414    case RandomNoise:
415    default:
416      numRandPerChannel = 1;
417      break;
418    case GaussianNoise:
419    case MultiplicativeGaussianNoise:
420    case PoissonNoise:
421      numRandPerChannel = 2;
422      break;
423    };
424
425    if ((channel & RedChannel) != 0)
426      numRandomNumberPerPixel+=numRandPerChannel;
427    if ((channel & GreenChannel) != 0)
428      numRandomNumberPerPixel+=numRandPerChannel;
429    if ((channel & BlueChannel) != 0)
430      numRandomNumberPerPixel+=numRandPerChannel;
431    if ((channel & OpacityChannel) != 0)
432      numRandomNumberPerPixel+=numRandPerChannel;
433  }
434
435  /* set up the random number generators */
436  attenuate=1.0;
437  option=GetImageArtifact(image,"attenuate");
438  if (option != (char *) NULL)
439    attenuate=StringToDouble(option,(char **) NULL);
440  random_info=AcquireRandomInfoThreadSet();
441#if defined(MAGICKCORE_OPENMP_SUPPORT)
442  key=GetRandomSecretKey(random_info[0]);
443  (void) key;
444#endif
445
446  addNoiseKernel = AcquireOpenCLKernel(clEnv,MAGICK_OPENCL_ACCELERATE,"AddNoise");
447
448  {
449    cl_uint computeUnitCount;
450    cl_uint workItemCount;
451    clEnv->library->clGetDeviceInfo(clEnv->device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &computeUnitCount, NULL);
452    workItemCount = computeUnitCount * 2 * 256;			// 256 work items per group, 2 groups per CU
453    inputPixelCount = (cl_int) (image->columns * image->rows);
454    pixelsPerWorkitem = (inputPixelCount + workItemCount - 1) / workItemCount;
455    pixelsPerWorkitem = ((pixelsPerWorkitem + 3) / 4) * 4;
456
457    local_work_size[0] = 256;
458    global_work_size[0] = workItemCount;
459  }
460  {
461    RandomInfo* randomInfo = AcquireRandomInfo();
462	const unsigned long* s = GetRandomInfoSeed(randomInfo);
463	seed0 = s[0];
464	GetPseudoRandomValue(randomInfo);
465	seed1 = s[0];
466	randomInfo = DestroyRandomInfo(randomInfo);
467  }
468
469  k = 0;
470  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&imageBuffer);
471  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_mem),(void *)&filteredImageBuffer);
472  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&inputPixelCount);
473  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
474  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(ChannelType),(void *)&channel);
475  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(NoiseType),(void *)&noise_type);
476  attenuate=1.0f;
477  option=GetImageArtifact(image,"attenuate");
478  if (option != (char *) NULL)
479    attenuate=(float)StringToDouble(option,(char **) NULL);
480  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(float),(void *)&attenuate);
481  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&seed0);
482  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(cl_uint),(void *)&seed1);
483  clEnv->library->clSetKernelArg(addNoiseKernel,k++,sizeof(unsigned int),(void *)&numRandomNumberPerPixel);
484
485  clEnv->library->clEnqueueNDRangeKernel(queue,addNoiseKernel,1,NULL,global_work_size,NULL,0,NULL,&event);
486
487  RecordProfileData(clEnv,AddNoiseKernel,event);
488  clEnv->library->clReleaseEvent(event);
489
490  if (ALIGNED(filteredPixels,CLPixelPacket))
491  {
492    length = image->columns * image->rows;
493    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
494  }
495  else
496  {
497    length = image->columns * image->rows;
498    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
499  }
500  if (clStatus != CL_SUCCESS)
501  {
502    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
503    goto cleanup;
504  }
505
506  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
507
508cleanup:
509  OpenCLLogException(__FUNCTION__,__LINE__,exception);
510
511  image_view=DestroyCacheView(image_view);
512  if (filteredImage_view != NULL)
513    filteredImage_view=DestroyCacheView(filteredImage_view);
514
515  if (queue!=NULL)                  RelinquishOpenCLCommandQueue(clEnv, queue);
516  if (addNoiseKernel!=NULL)         RelinquishOpenCLKernel(clEnv, addNoiseKernel);
517  if (imageBuffer!=NULL)		    clEnv->library->clReleaseMemObject(imageBuffer);
518  if (filteredImageBuffer!=NULL)	  clEnv->library->clReleaseMemObject(filteredImageBuffer);
519  if (outputReady == MagickFalse && filteredImage != NULL)
520    filteredImage=DestroyImage(filteredImage);
521
522  return(filteredImage);
523}
524
525MagickExport Image *AccelerateAddNoiseImage(const Image *image,
526  const ChannelType channel,const NoiseType noise_type,
527  ExceptionInfo *exception)
528{
529  Image
530    *filteredImage;
531
532  assert(image != NULL);
533  assert(exception != (ExceptionInfo *) NULL);
534
535  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
536      (checkAccelerateCondition(image, channel) == MagickFalse))
537    return NULL;
538
539  filteredImage = ComputeAddNoiseImage(image,channel,noise_type,exception);
540
541  return(filteredImage);
542}
543
544/*
545%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
546%                                                                             %
547%                                                                             %
548%                                                                             %
549%     A c c e l e r a t e B l u r I m a g e                                   %
550%                                                                             %
551%                                                                             %
552%                                                                             %
553%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
554*/
555
556static Image *ComputeBlurImage(const Image* image,const ChannelType channel,
557  const double radius,const double sigma,ExceptionInfo *exception)
558{
559  CacheView
560    *filteredImage_view,
561    *image_view;
562
563  char
564    geometry[MagickPathExtent];
565
566  cl_command_queue
567    queue;
568
569  cl_context
570    context;
571
572  cl_int
573    clStatus;
574
575  cl_kernel
576    blurColumnKernel,
577    blurRowKernel;
578
579  cl_event
580    event;
581
582  cl_mem
583    filteredImageBuffer,
584    imageBuffer,
585    imageKernelBuffer,
586    tempImageBuffer;
587
588  cl_mem_flags
589    mem_flags;
590
591  const void
592    *inputPixels;
593
594  float
595    *kernelBufferPtr;
596
597  Image
598    *filteredImage;
599
600  MagickBooleanType
601    outputReady;
602
603  MagickCLEnv
604    clEnv;
605
606  MagickSizeType
607    length;
608
609  KernelInfo
610    *kernel;
611
612  unsigned int
613    i,
614    imageColumns,
615    imageRows,
616    kernelWidth;
617
618  void
619    *filteredPixels,
620    *hostPtr;
621
622  context = NULL;
623  filteredImage = NULL;
624  filteredImage_view = NULL;
625  imageBuffer = NULL;
626  tempImageBuffer = NULL;
627  filteredImageBuffer = NULL;
628  imageKernelBuffer = NULL;
629  blurRowKernel = NULL;
630  blurColumnKernel = NULL;
631  queue = NULL;
632  kernel = NULL;
633
634  outputReady = MagickFalse;
635
636  clEnv = GetDefaultOpenCLEnv();
637  context = GetOpenCLContext(clEnv);
638  queue = AcquireOpenCLCommandQueue(clEnv);
639
640  /* Create and initialize OpenCL buffers. */
641  {
642    image_view=AcquireVirtualCacheView(image,exception);
643    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
644    if (inputPixels == (const void *) NULL)
645    {
646      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
647      goto cleanup;
648    }
649    /* If the host pointer is aligned to the size of CLPixelPacket,
650     then use the host buffer directly from the GPU; otherwise,
651     create a buffer on the GPU and copy the data over */
652    if (ALIGNED(inputPixels,CLPixelPacket))
653    {
654      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
655    }
656    else
657    {
658      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
659    }
660    /* create a CL buffer from image pixel buffer */
661    length = image->columns * image->rows;
662    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
663    if (clStatus != CL_SUCCESS)
664    {
665      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
666      goto cleanup;
667    }
668  }
669
670  /* create output */
671  {
672    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
673    assert(filteredImage != NULL);
674    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
675    {
676      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
677      goto cleanup;
678    }
679    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
680    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
681    if (filteredPixels == (void *) NULL)
682    {
683      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
684      goto cleanup;
685    }
686
687    if (ALIGNED(filteredPixels,CLPixelPacket))
688    {
689      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
690      hostPtr = filteredPixels;
691    }
692    else
693    {
694      mem_flags = CL_MEM_WRITE_ONLY;
695      hostPtr = NULL;
696    }
697    /* create a CL buffer from image pixel buffer */
698    length = image->columns * image->rows;
699    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
700    if (clStatus != CL_SUCCESS)
701    {
702      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
703      goto cleanup;
704    }
705  }
706
707  /* create processing kernel */
708  {
709    (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
710    kernel=AcquireKernelInfo(geometry,exception);
711    if (kernel == (KernelInfo *) NULL)
712    {
713      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
714      goto cleanup;
715    }
716
717    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
718    if (clStatus != CL_SUCCESS)
719    {
720      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
721      goto cleanup;
722    }
723    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
724    if (clStatus != CL_SUCCESS)
725    {
726      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
727      goto cleanup;
728    }
729
730    for (i = 0; i < kernel->width; i++)
731    {
732      kernelBufferPtr[i] = (float) kernel->values[i];
733    }
734
735    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
736    if (clStatus != CL_SUCCESS)
737    {
738      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
739      goto cleanup;
740    }
741  }
742
743  {
744
745    /* create temp buffer */
746    {
747      length = image->columns * image->rows;
748      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
749      if (clStatus != CL_SUCCESS)
750      {
751        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
752        goto cleanup;
753      }
754    }
755
756    /* get the OpenCL kernels */
757    {
758      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
759      if (blurRowKernel == NULL)
760      {
761        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
762        goto cleanup;
763      };
764
765      blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurColumn");
766      if (blurColumnKernel == NULL)
767      {
768        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
769        goto cleanup;
770      };
771    }
772
773    {
774      /* need logic to decide this value */
775      int chunkSize = 256;
776
777      {
778        imageColumns = (unsigned int) image->columns;
779        imageRows = (unsigned int) image->rows;
780
781        /* set the kernel arguments */
782        i = 0;
783        clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
784        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
785        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
786        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
787        kernelWidth = (unsigned int) kernel->width;
788        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
789        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
790        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
791        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
792        if (clStatus != CL_SUCCESS)
793        {
794          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
795          goto cleanup;
796        }
797      }
798
799      /* launch the kernel */
800      {
801        size_t gsize[2];
802        size_t wsize[2];
803
804        gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize);
805        gsize[1] = image->rows;
806        wsize[0] = chunkSize;
807        wsize[1] = 1;
808
809		clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
810        if (clStatus != CL_SUCCESS)
811        {
812          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
813          goto cleanup;
814        }
815        clEnv->library->clFlush(queue);
816        RecordProfileData(clEnv,BlurRowKernel,event);
817        clEnv->library->clReleaseEvent(event);
818      }
819    }
820
821    {
822      /* need logic to decide this value */
823      int chunkSize = 256;
824
825      {
826        imageColumns = (unsigned int) image->columns;
827        imageRows = (unsigned int) image->rows;
828
829        /* set the kernel arguments */
830        i = 0;
831        clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
832        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
833        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
834        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
835        kernelWidth = (unsigned int) kernel->width;
836        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
837        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
838        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
839        clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *) NULL);
840        if (clStatus != CL_SUCCESS)
841        {
842          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
843          goto cleanup;
844        }
845      }
846
847      /* launch the kernel */
848      {
849        size_t gsize[2];
850        size_t wsize[2];
851
852        gsize[0] = image->columns;
853        gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize);
854        wsize[0] = 1;
855        wsize[1] = chunkSize;
856
857		clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
858        if (clStatus != CL_SUCCESS)
859        {
860          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
861          goto cleanup;
862        }
863        clEnv->library->clFlush(queue);
864        RecordProfileData(clEnv,BlurColumnKernel,event);
865        clEnv->library->clReleaseEvent(event);
866      }
867    }
868
869  }
870
871  /* get result */
872  if (ALIGNED(filteredPixels,CLPixelPacket))
873  {
874    length = image->columns * image->rows;
875    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
876  }
877  else
878  {
879    length = image->columns * image->rows;
880    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
881  }
882  if (clStatus != CL_SUCCESS)
883  {
884    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
885    goto cleanup;
886  }
887
888  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
889
890cleanup:
891  OpenCLLogException(__FUNCTION__,__LINE__,exception);
892
893  image_view=DestroyCacheView(image_view);
894  if (filteredImage_view != NULL)
895    filteredImage_view=DestroyCacheView(filteredImage_view);
896
897  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
898  if (tempImageBuffer!=NULL)      clEnv->library->clReleaseMemObject(tempImageBuffer);
899  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
900  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
901  if (blurRowKernel!=NULL)        RelinquishOpenCLKernel(clEnv, blurRowKernel);
902  if (blurColumnKernel!=NULL)     RelinquishOpenCLKernel(clEnv, blurColumnKernel);
903  if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
904  if (kernel!=NULL)               DestroyKernelInfo(kernel);
905  if (outputReady == MagickFalse && filteredImage != NULL)
906    filteredImage=DestroyImage(filteredImage);
907  return(filteredImage);
908}
909
910static Image* ComputeBlurImageSection(const Image* image,
911  const ChannelType channel,const double radius,const double sigma,
912  ExceptionInfo *exception)
913{
914  CacheView
915    *filteredImage_view,
916    *image_view;
917
918  char
919    geometry[MagickPathExtent];
920
921  cl_command_queue
922    queue;
923
924  cl_int
925    clStatus;
926
927  cl_kernel
928    blurColumnKernel,
929    blurRowKernel;
930
931  cl_event
932    event;
933
934  cl_mem
935    imageBuffer,
936    tempImageBuffer,
937    filteredImageBuffer,
938    imageKernelBuffer;
939
940  cl_mem_flags
941    mem_flags;
942
943  cl_context
944    context;
945
946  const void
947    *inputPixels;
948
949  float
950    *kernelBufferPtr;
951
952  Image
953    *filteredImage;
954
955  KernelInfo
956    *kernel;
957
958  MagickBooleanType
959    outputReady;
960
961  MagickCLEnv
962    clEnv;
963
964  MagickSizeType
965    length;
966
967  unsigned int
968    i,
969    imageColumns,
970    imageRows,
971    kernelWidth;
972
973  void
974    *filteredPixels,
975    *hostPtr;
976
977  context = NULL;
978  filteredImage = NULL;
979  filteredImage_view = NULL;
980  imageBuffer = NULL;
981  tempImageBuffer = NULL;
982  filteredImageBuffer = NULL;
983  imageKernelBuffer = NULL;
984  blurRowKernel = NULL;
985  blurColumnKernel = NULL;
986  queue = NULL;
987  kernel = NULL;
988
989  outputReady = MagickFalse;
990
991  clEnv = GetDefaultOpenCLEnv();
992  context = GetOpenCLContext(clEnv);
993  queue = AcquireOpenCLCommandQueue(clEnv);
994
995  /* Create and initialize OpenCL buffers. */
996  {
997    image_view=AcquireVirtualCacheView(image,exception);
998    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
999    if (inputPixels == (const void *) NULL)
1000    {
1001      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1002      goto cleanup;
1003    }
1004    /* If the host pointer is aligned to the size of CLPixelPacket,
1005     then use the host buffer directly from the GPU; otherwise,
1006     create a buffer on the GPU and copy the data over */
1007    if (ALIGNED(inputPixels,CLPixelPacket))
1008    {
1009      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1010    }
1011    else
1012    {
1013      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1014    }
1015    /* create a CL buffer from image pixel buffer */
1016    length = image->columns * image->rows;
1017    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1018    if (clStatus != CL_SUCCESS)
1019    {
1020      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1021      goto cleanup;
1022    }
1023  }
1024
1025  /* create output */
1026  {
1027    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
1028    assert(filteredImage != NULL);
1029    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1030    {
1031      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
1032      goto cleanup;
1033    }
1034    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1035    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1036    if (filteredPixels == (void *) NULL)
1037    {
1038      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1039      goto cleanup;
1040    }
1041
1042    if (ALIGNED(filteredPixels,CLPixelPacket))
1043    {
1044      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1045      hostPtr = filteredPixels;
1046    }
1047    else
1048    {
1049      mem_flags = CL_MEM_WRITE_ONLY;
1050      hostPtr = NULL;
1051    }
1052    /* create a CL buffer from image pixel buffer */
1053    length = image->columns * image->rows;
1054    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1055    if (clStatus != CL_SUCCESS)
1056    {
1057      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1058      goto cleanup;
1059    }
1060  }
1061
1062  /* create processing kernel */
1063  {
1064    (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
1065    kernel=AcquireKernelInfo(geometry,exception);
1066    if (kernel == (KernelInfo *) NULL)
1067    {
1068      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "MemoryAllocationFailed.",".");
1069      goto cleanup;
1070    }
1071
1072    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernel->width * sizeof(float), NULL, &clStatus);
1073    if (clStatus != CL_SUCCESS)
1074    {
1075      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1076      goto cleanup;
1077    }
1078    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
1079    if (clStatus != CL_SUCCESS)
1080    {
1081      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1082      goto cleanup;
1083    }
1084
1085    for (i = 0; i < kernel->width; i++)
1086    {
1087      kernelBufferPtr[i] = (float) kernel->values[i];
1088    }
1089
1090    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
1091    if (clStatus != CL_SUCCESS)
1092    {
1093      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
1094      goto cleanup;
1095    }
1096  }
1097
1098  {
1099    unsigned int offsetRows;
1100    unsigned int sec;
1101
1102    /* create temp buffer */
1103    {
1104      length = image->columns * (image->rows / 2 + 1 + (kernel->width-1) / 2);
1105      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
1106      if (clStatus != CL_SUCCESS)
1107      {
1108        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1109        goto cleanup;
1110      }
1111    }
1112
1113    /* get the OpenCL kernels */
1114    {
1115      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurSectionRow");
1116      if (blurRowKernel == NULL)
1117      {
1118        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1119        goto cleanup;
1120      };
1121
1122      blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurSectionColumn");
1123      if (blurColumnKernel == NULL)
1124      {
1125        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1126        goto cleanup;
1127      };
1128    }
1129
1130    for (sec = 0; sec < 2; sec++)
1131    {
1132      {
1133        /* need logic to decide this value */
1134        int chunkSize = 256;
1135
1136        {
1137          imageColumns = (unsigned int) image->columns;
1138          if (sec == 0)
1139            imageRows = (unsigned int) (image->rows / 2 + (kernel->width-1) / 2);
1140          else
1141            imageRows = (unsigned int) ((image->rows - image->rows / 2) + (kernel->width-1) / 2);
1142
1143          offsetRows = (unsigned int) (sec * image->rows / 2);
1144
1145          kernelWidth = (unsigned int) kernel->width;
1146
1147          /* set the kernel arguments */
1148          i = 0;
1149          clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1150          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1151          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
1152          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1153          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1154          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1155          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1156          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
1157          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1158          clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
1159          if (clStatus != CL_SUCCESS)
1160          {
1161            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1162            goto cleanup;
1163          }
1164        }
1165
1166        /* launch the kernel */
1167        {
1168          size_t gsize[2];
1169          size_t wsize[2];
1170
1171          gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
1172          gsize[1] = imageRows;
1173          wsize[0] = chunkSize;
1174          wsize[1] = 1;
1175
1176		  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
1177          if (clStatus != CL_SUCCESS)
1178          {
1179            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1180            goto cleanup;
1181          }
1182          clEnv->library->clFlush(queue);
1183          RecordProfileData(clEnv,BlurRowKernel,event);
1184          clEnv->library->clReleaseEvent(event);
1185        }
1186      }
1187
1188      {
1189        /* need logic to decide this value */
1190        int chunkSize = 256;
1191
1192        {
1193          imageColumns = (unsigned int) image->columns;
1194          if (sec == 0)
1195            imageRows = (unsigned int) (image->rows / 2);
1196          else
1197            imageRows = (unsigned int) ((image->rows - image->rows / 2));
1198
1199          offsetRows = (unsigned int) (sec * image->rows / 2);
1200
1201          kernelWidth = (unsigned int) kernel->width;
1202
1203          /* set the kernel arguments */
1204          i = 0;
1205          clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
1206          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1207          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&channel);
1208          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
1209          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
1210          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
1211          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
1212          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernel->width),(void *) NULL);
1213          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
1214          clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
1215          if (clStatus != CL_SUCCESS)
1216          {
1217            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1218            goto cleanup;
1219          }
1220        }
1221
1222        /* launch the kernel */
1223        {
1224          size_t gsize[2];
1225          size_t wsize[2];
1226
1227          gsize[0] = imageColumns;
1228          gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
1229          wsize[0] = 1;
1230          wsize[1] = chunkSize;
1231
1232		  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
1233          if (clStatus != CL_SUCCESS)
1234          {
1235            (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1236            goto cleanup;
1237          }
1238          clEnv->library->clFlush(queue);
1239          RecordProfileData(clEnv,BlurColumnKernel,event);
1240          clEnv->library->clReleaseEvent(event);
1241        }
1242      }
1243    }
1244
1245  }
1246
1247  /* get result */
1248  if (ALIGNED(filteredPixels,CLPixelPacket))
1249  {
1250    length = image->columns * image->rows;
1251    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1252  }
1253  else
1254  {
1255    length = image->columns * image->rows;
1256    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1257  }
1258  if (clStatus != CL_SUCCESS)
1259  {
1260    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
1261    goto cleanup;
1262  }
1263
1264  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1265
1266cleanup:
1267  OpenCLLogException(__FUNCTION__,__LINE__,exception);
1268
1269  image_view=DestroyCacheView(image_view);
1270  if (filteredImage_view != NULL)
1271    filteredImage_view=DestroyCacheView(filteredImage_view);
1272
1273  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
1274  if (tempImageBuffer!=NULL)      clEnv->library->clReleaseMemObject(tempImageBuffer);
1275  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
1276  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
1277  if (blurRowKernel!=NULL)        RelinquishOpenCLKernel(clEnv, blurRowKernel);
1278  if (blurColumnKernel!=NULL)     RelinquishOpenCLKernel(clEnv, blurColumnKernel);
1279  if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
1280  if (kernel!=NULL)               DestroyKernelInfo(kernel);
1281  if (outputReady == MagickFalse)
1282  {
1283    if (filteredImage != NULL)
1284    {
1285      DestroyImage(filteredImage);
1286      filteredImage = NULL;
1287    }
1288  }
1289  return filteredImage;
1290}
1291
1292static Image* ComputeBlurImageSingle(const Image* image,
1293  const ChannelType channel,const double radius,const double sigma,
1294  ExceptionInfo *exception)
1295{
1296  return ComputeUnsharpMaskImageSingle(image, channel, radius, sigma, 0.0, 0.0, 1, exception);
1297}
1298
1299MagickExport Image* AccelerateBlurImage(const Image *image,
1300  const ChannelType channel,const double radius,const double sigma,
1301  ExceptionInfo *exception)
1302{
1303  Image
1304    *filteredImage;
1305
1306  assert(image != NULL);
1307  assert(exception != (ExceptionInfo *) NULL);
1308
1309  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
1310      (checkAccelerateCondition(image, channel) == MagickFalse))
1311    return NULL;
1312
1313  if (radius < 12.1)
1314	filteredImage=ComputeBlurImageSingle(image, channel, radius, sigma, exception);
1315  else if (splitImage(image) && (image->rows / 2 > radius))
1316    filteredImage=ComputeBlurImageSection(image, channel, radius, sigma, exception);
1317  else
1318    filteredImage=ComputeBlurImage(image, channel, radius, sigma, exception);
1319
1320  return(filteredImage);
1321}
1322
1323/*
1324%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1325%                                                                             %
1326%                                                                             %
1327%                                                                             %
1328%     A c c e l e r a t e C o m p o s i t e I m a g e                         %
1329%                                                                             %
1330%                                                                             %
1331%                                                                             %
1332%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1333*/
1334
1335static MagickBooleanType LaunchCompositeKernel(MagickCLEnv clEnv,
1336  cl_command_queue queue,cl_mem imageBuffer,const unsigned int inputWidth,
1337  const unsigned int inputHeight,const unsigned int matte,
1338  const ChannelType channel,const CompositeOperator compose,
1339  const cl_mem compositeImageBuffer,const unsigned int compositeWidth,
1340  const unsigned int compositeHeight,const float destination_dissolve,
1341  const float source_dissolve,ExceptionInfo *magick_unused(exception))
1342{
1343  cl_int
1344    clStatus;
1345
1346  cl_kernel
1347    compositeKernel;
1348
1349  cl_event
1350    event;
1351
1352  int
1353    k;
1354
1355  size_t
1356    global_work_size[2],
1357    local_work_size[2];
1358
1359  unsigned int
1360    composeOp;
1361
1362  magick_unreferenced(exception);
1363
1364  compositeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE,
1365    "Composite");
1366
1367  k = 0;
1368  clStatus=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&imageBuffer);
1369  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputWidth);
1370  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&inputHeight);
1371  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(cl_mem),(void*)&compositeImageBuffer);
1372  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeWidth);
1373  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&compositeHeight);
1374  composeOp = (unsigned int)compose;
1375  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&composeOp);
1376  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(ChannelType),(void*)&channel);
1377  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(unsigned int),(void*)&matte);
1378  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&destination_dissolve);
1379  clStatus|=clEnv->library->clSetKernelArg(compositeKernel,k++,sizeof(float),(void*)&source_dissolve);
1380
1381  if (clStatus!=CL_SUCCESS)
1382    return MagickFalse;
1383
1384  local_work_size[0] = 64;
1385  local_work_size[1] = 1;
1386
1387  global_work_size[0] = padGlobalWorkgroupSizeToLocalWorkgroupSize(inputWidth,
1388    (unsigned int) local_work_size[0]);
1389  global_work_size[1] = inputHeight;
1390  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, compositeKernel, 2, NULL,
1391	  global_work_size, local_work_size, 0, NULL, &event);
1392
1393  RecordProfileData(clEnv,CompositeKernel,event);
1394  clEnv->library->clReleaseEvent(event);
1395
1396  RelinquishOpenCLKernel(clEnv, compositeKernel);
1397
1398  return((clStatus==CL_SUCCESS) ? MagickTrue : MagickFalse);
1399}
1400
1401static MagickBooleanType ComputeCompositeImage(Image *image,
1402  const ChannelType channel,const CompositeOperator compose,
1403  const Image *compositeImage,const ssize_t magick_unused(x_offset),
1404  const ssize_t magick_unused(y_offset),const float destination_dissolve,
1405  const float source_dissolve,ExceptionInfo *exception)
1406{
1407  CacheView
1408    *image_view;
1409
1410  cl_command_queue
1411    queue;
1412
1413  cl_context
1414    context;
1415
1416  cl_int
1417    clStatus;
1418
1419  cl_mem_flags
1420    mem_flags;
1421
1422  cl_mem
1423    compositeImageBuffer,
1424    imageBuffer;
1425
1426  const void
1427    *composePixels;
1428
1429  MagickBooleanType
1430    outputReady,
1431    status;
1432
1433  MagickCLEnv
1434    clEnv;
1435
1436  MagickSizeType
1437    length;
1438
1439  void
1440    *inputPixels;
1441
1442  magick_unreferenced(x_offset);
1443  magick_unreferenced(y_offset);
1444
1445  status = MagickFalse;
1446  outputReady = MagickFalse;
1447  composePixels = NULL;
1448  imageBuffer = NULL;
1449  compositeImageBuffer = NULL;
1450
1451  clEnv = GetDefaultOpenCLEnv();
1452  context = GetOpenCLContext(clEnv);
1453  queue = AcquireOpenCLCommandQueue(clEnv);
1454
1455  /* Create and initialize OpenCL buffers. */
1456  image_view=AcquireAuthenticCacheView(image,exception);
1457  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1458  if (inputPixels == (void *) NULL)
1459  {
1460    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
1461      "UnableToReadPixelCache.","`%s'",image->filename);
1462    goto cleanup;
1463  }
1464
1465  /* If the host pointer is aligned to the size of CLPixelPacket,
1466     then use the host buffer directly from the GPU; otherwise,
1467     create a buffer on the GPU and copy the data over */
1468  if (ALIGNED(inputPixels,CLPixelPacket))
1469  {
1470    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1471  }
1472  else
1473  {
1474    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1475  }
1476  /* create a CL buffer from image pixel buffer */
1477  length = image->columns * image->rows;
1478  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags,
1479    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1480  if (clStatus != CL_SUCCESS)
1481  {
1482    (void) OpenCLThrowMagickException(exception, GetMagickModule(),
1483      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1484    goto cleanup;
1485  }
1486
1487
1488  /* Create and initialize OpenCL buffers. */
1489  composePixels = AcquirePixelCachePixels(compositeImage, &length, exception);
1490  if (composePixels == (void *) NULL)
1491  {
1492    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,
1493      "UnableToReadPixelCache.","`%s'",compositeImage->filename);
1494    goto cleanup;
1495  }
1496
1497  /* If the host pointer is aligned to the size of CLPixelPacket,
1498     then use the host buffer directly from the GPU; otherwise,
1499     create a buffer on the GPU and copy the data over */
1500  if (ALIGNED(composePixels,CLPixelPacket))
1501  {
1502    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1503  }
1504  else
1505  {
1506    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1507  }
1508  /* create a CL buffer from image pixel buffer */
1509  length = compositeImage->columns * compositeImage->rows;
1510  compositeImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags,
1511    length * sizeof(CLPixelPacket), (void*)composePixels, &clStatus);
1512  if (clStatus != CL_SUCCESS)
1513  {
1514    (void) OpenCLThrowMagickException(exception, GetMagickModule(),
1515      ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1516    goto cleanup;
1517  }
1518
1519  status = LaunchCompositeKernel(clEnv,queue,imageBuffer,
1520           (unsigned int) image->columns,
1521           (unsigned int) image->rows,
1522           (unsigned int) (image->alpha_trait > CopyPixelTrait) ? 1 : 0,
1523           channel, compose, compositeImageBuffer,
1524           (unsigned int) compositeImage->columns,
1525           (unsigned int) compositeImage->rows,
1526           destination_dissolve,source_dissolve,
1527           exception);
1528
1529  if (status==MagickFalse)
1530    goto cleanup;
1531
1532  length = image->columns * image->rows;
1533  if (ALIGNED(inputPixels,CLPixelPacket))
1534  {
1535    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE,
1536      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
1537      NULL, &clStatus);
1538  }
1539  else
1540  {
1541    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0,
1542      length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1543  }
1544  if (clStatus==CL_SUCCESS)
1545    outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1546
1547cleanup:
1548
1549  image_view=DestroyCacheView(image_view);
1550  if (imageBuffer!=NULL)      clEnv->library->clReleaseMemObject(imageBuffer);
1551  if (compositeImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(compositeImageBuffer);
1552  if (queue != NULL)               RelinquishOpenCLCommandQueue(clEnv, queue);
1553
1554  return(outputReady);
1555}
1556
1557MagickExport MagickBooleanType AccelerateCompositeImage(Image *image,
1558  const ChannelType channel,const CompositeOperator compose,
1559  const Image *composite,const ssize_t x_offset,const ssize_t y_offset,
1560  const float destination_dissolve,const float source_dissolve,
1561  ExceptionInfo *exception)
1562{
1563  MagickBooleanType
1564    status;
1565
1566  assert(image != NULL);
1567  assert(exception != (ExceptionInfo *) NULL);
1568
1569  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
1570      (checkAccelerateCondition(image, channel) == MagickFalse))
1571    return(MagickFalse);
1572
1573  /* only support zero offset and
1574     images with the size for now */
1575  if (x_offset!=0
1576    || y_offset!=0
1577    || image->columns!=composite->columns
1578    || image->rows!=composite->rows)
1579    return MagickFalse;
1580
1581  switch(compose) {
1582  case ColorDodgeCompositeOp:
1583  case BlendCompositeOp:
1584    break;
1585  default:
1586    // unsupported compose operator, quit
1587    return MagickFalse;
1588  };
1589
1590  status = ComputeCompositeImage(image,channel,compose,composite,
1591    x_offset,y_offset,destination_dissolve,source_dissolve,exception);
1592
1593  return(status);
1594}
1595
1596/*
1597%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1598%                                                                             %
1599%                                                                             %
1600%                                                                             %
1601%     A c c e l e r a t e C o n t r a s t I m a g e                           %
1602%                                                                             %
1603%                                                                             %
1604%                                                                             %
1605%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1606*/
1607
1608static MagickBooleanType ComputeContrastImage(Image *image,
1609  const MagickBooleanType sharpen,ExceptionInfo *exception)
1610{
1611  CacheView
1612    *image_view;
1613
1614  cl_command_queue
1615    queue;
1616
1617  cl_context
1618    context;
1619
1620  cl_int
1621    clStatus;
1622
1623  cl_kernel
1624    filterKernel;
1625
1626  cl_event
1627    event;
1628
1629  cl_mem
1630    imageBuffer;
1631
1632  cl_mem_flags
1633    mem_flags;
1634
1635  MagickBooleanType
1636    outputReady;
1637
1638  MagickCLEnv
1639    clEnv;
1640
1641  MagickSizeType
1642    length;
1643
1644  size_t
1645    global_work_size[2];
1646
1647  unsigned int
1648    i,
1649    uSharpen;
1650
1651  void
1652    *inputPixels;
1653
1654  outputReady = MagickFalse;
1655  clEnv = NULL;
1656  inputPixels = NULL;
1657  context = NULL;
1658  imageBuffer = NULL;
1659  filterKernel = NULL;
1660  queue = NULL;
1661
1662  clEnv = GetDefaultOpenCLEnv();
1663  context = GetOpenCLContext(clEnv);
1664
1665  /* Create and initialize OpenCL buffers. */
1666  image_view=AcquireAuthenticCacheView(image,exception);
1667  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1668  if (inputPixels == (void *) NULL)
1669  {
1670    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1671    goto cleanup;
1672  }
1673
1674  /* If the host pointer is aligned to the size of CLPixelPacket,
1675     then use the host buffer directly from the GPU; otherwise,
1676     create a buffer on the GPU and copy the data over */
1677  if (ALIGNED(inputPixels,CLPixelPacket))
1678  {
1679    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1680  }
1681  else
1682  {
1683    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1684  }
1685  /* create a CL buffer from image pixel buffer */
1686  length = image->columns * image->rows;
1687  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1688  if (clStatus != CL_SUCCESS)
1689  {
1690    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1691    goto cleanup;
1692  }
1693
1694  filterKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Contrast");
1695  if (filterKernel == NULL)
1696  {
1697    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1698    goto cleanup;
1699  }
1700
1701  i = 0;
1702  clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1703
1704  uSharpen = (sharpen == MagickFalse)?0:1;
1705  clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
1706  if (clStatus != CL_SUCCESS)
1707  {
1708    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1709    goto cleanup;
1710  }
1711
1712  global_work_size[0] = image->columns;
1713  global_work_size[1] = image->rows;
1714  /* launch the kernel */
1715  queue = AcquireOpenCLCommandQueue(clEnv);
1716  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1717  if (clStatus != CL_SUCCESS)
1718  {
1719    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1720    goto cleanup;
1721  }
1722  clEnv->library->clFlush(queue);
1723  RecordProfileData(clEnv,ContrastKernel,event);
1724  clEnv->library->clReleaseEvent(event);
1725
1726  if (ALIGNED(inputPixels,CLPixelPacket))
1727  {
1728    length = image->columns * image->rows;
1729    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1730  }
1731  else
1732  {
1733    length = image->columns * image->rows;
1734    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1735  }
1736  if (clStatus != CL_SUCCESS)
1737  {
1738    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
1739    goto cleanup;
1740  }
1741  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1742
1743cleanup:
1744  OpenCLLogException(__FUNCTION__,__LINE__,exception);
1745
1746  image_view=DestroyCacheView(image_view);
1747
1748  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
1749  if (filterKernel!=NULL)                     RelinquishOpenCLKernel(clEnv, filterKernel);
1750  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
1751  return(outputReady);
1752}
1753
1754MagickExport MagickBooleanType AccelerateContrastImage(Image *image,
1755  const MagickBooleanType sharpen,ExceptionInfo *exception)
1756{
1757  MagickBooleanType
1758    status;
1759
1760  assert(image != NULL);
1761  assert(exception != (ExceptionInfo *) NULL);
1762
1763  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
1764      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
1765    return(MagickFalse);
1766
1767  status = ComputeContrastImage(image,sharpen,exception);
1768  return(status);
1769}
1770
1771/*
1772%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1773%                                                                             %
1774%                                                                             %
1775%                                                                             %
1776%     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
1777%                                                                             %
1778%                                                                             %
1779%                                                                             %
1780%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1781*/
1782
1783static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
1784  cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,
1785  Image *image,const ChannelType channel,ExceptionInfo *exception)
1786{
1787  MagickBooleanType
1788    outputReady;
1789
1790  cl_int
1791    clStatus,
1792    colorspace,
1793    method;
1794
1795  cl_kernel
1796    histogramKernel;
1797
1798  cl_event
1799    event;
1800
1801  register ssize_t
1802    i;
1803
1804  size_t
1805    global_work_size[2];
1806
1807  histogramKernel = NULL;
1808
1809  outputReady = MagickFalse;
1810  method = image->intensity;
1811  colorspace = image->colorspace;
1812
1813  /* get the OpenCL kernel */
1814  histogramKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Histogram");
1815  if (histogramKernel == NULL)
1816  {
1817    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
1818    goto cleanup;
1819  }
1820
1821  /* set the kernel arguments */
1822  i = 0;
1823  clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1824  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
1825  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&method);
1826  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_int),&colorspace);
1827  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
1828  if (clStatus != CL_SUCCESS)
1829  {
1830    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
1831    goto cleanup;
1832  }
1833
1834  /* launch the kernel */
1835  global_work_size[0] = image->columns;
1836  global_work_size[1] = image->rows;
1837
1838  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1839
1840  if (clStatus != CL_SUCCESS)
1841  {
1842    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
1843    goto cleanup;
1844  }
1845  clEnv->library->clFlush(queue);
1846  RecordProfileData(clEnv,HistogramKernel,event);
1847  clEnv->library->clReleaseEvent(event);
1848
1849  outputReady = MagickTrue;
1850
1851cleanup:
1852  OpenCLLogException(__FUNCTION__,__LINE__,exception);
1853
1854  if (histogramKernel!=NULL)
1855    RelinquishOpenCLKernel(clEnv, histogramKernel);
1856
1857  return(outputReady);
1858}
1859
1860static MagickBooleanType ComputeContrastStretchImage(Image *image,
1861  const ChannelType channel,const double black_point,const double white_point,
1862  ExceptionInfo *exception)
1863{
1864#define ContrastStretchImageTag  "ContrastStretch/Image"
1865#define MaxRange(color)  ((MagickRealType) ScaleQuantumToMap((Quantum) (color)))
1866
1867  CacheView
1868    *image_view;
1869
1870  cl_command_queue
1871    queue;
1872
1873  cl_context
1874    context;
1875
1876  cl_int
1877    clStatus;
1878
1879  cl_mem_flags
1880    mem_flags;
1881
1882  cl_mem
1883    histogramBuffer,
1884    imageBuffer,
1885    stretchMapBuffer;
1886
1887  cl_kernel
1888    histogramKernel,
1889    stretchKernel;
1890
1891  cl_event
1892    event;
1893
1894  cl_uint4
1895    *histogram;
1896
1897  double
1898    intensity;
1899
1900  FloatPixelPacket
1901    black,
1902    white;
1903
1904  MagickBooleanType
1905    outputReady,
1906    status;
1907
1908  MagickCLEnv
1909    clEnv;
1910
1911  MagickSizeType
1912    length;
1913
1914  PixelPacket
1915    *stretch_map;
1916
1917  register ssize_t
1918    i;
1919
1920  size_t
1921    global_work_size[2];
1922
1923  void
1924    *hostPtr,
1925    *inputPixels;
1926
1927  histogram=NULL;
1928  stretch_map=NULL;
1929  inputPixels = NULL;
1930  imageBuffer = NULL;
1931  histogramBuffer = NULL;
1932  stretchMapBuffer = NULL;
1933  histogramKernel = NULL;
1934  stretchKernel = NULL;
1935  context = NULL;
1936  queue = NULL;
1937  outputReady = MagickFalse;
1938
1939
1940  assert(image != (Image *) NULL);
1941  assert(image->signature == MagickCoreSignature);
1942  if (image->debug != MagickFalse)
1943    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1944
1945  //exception=(&image->exception);
1946
1947  /*
1948   * initialize opencl env
1949   */
1950  clEnv = GetDefaultOpenCLEnv();
1951  context = GetOpenCLContext(clEnv);
1952  queue = AcquireOpenCLCommandQueue(clEnv);
1953
1954  /*
1955    Allocate and initialize histogram arrays.
1956  */
1957  histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1958
1959  if (histogram == (cl_uint4 *) NULL)
1960    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1961
1962  /* reset histogram */
1963  (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
1964
1965  /*
1966  if (IsGrayImage(image,exception) != MagickFalse)
1967    (void) SetImageColorspace(image,GRAYColorspace);
1968  */
1969
1970  status=MagickTrue;
1971
1972
1973  /*
1974    Form histogram.
1975  */
1976  /* Create and initialize OpenCL buffers. */
1977  /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1978  /* assume this  will get a writable image */
1979  image_view=AcquireAuthenticCacheView(image,exception);
1980  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1981
1982  if (inputPixels == (void *) NULL)
1983  {
1984    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1985    goto cleanup;
1986  }
1987  /* If the host pointer is aligned to the size of CLPixelPacket,
1988     then use the host buffer directly from the GPU; otherwise,
1989     create a buffer on the GPU and copy the data over */
1990  if (ALIGNED(inputPixels,CLPixelPacket))
1991  {
1992    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1993  }
1994  else
1995  {
1996    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1997  }
1998  /* create a CL buffer from image pixel buffer */
1999  length = image->columns * image->rows;
2000  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2001  if (clStatus != CL_SUCCESS)
2002  {
2003    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2004    goto cleanup;
2005  }
2006
2007  /* If the host pointer is aligned to the size of cl_uint,
2008     then use the host buffer directly from the GPU; otherwise,
2009     create a buffer on the GPU and copy the data over */
2010  if (ALIGNED(histogram,cl_uint4))
2011  {
2012    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2013    hostPtr = histogram;
2014  }
2015  else
2016  {
2017    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2018    hostPtr = histogram;
2019  }
2020  /* create a CL buffer for histogram  */
2021  length = (MaxMap+1);
2022  histogramBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2023  if (clStatus != CL_SUCCESS)
2024  {
2025    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2026    goto cleanup;
2027  }
2028
2029  status = LaunchHistogramKernel(clEnv, queue, imageBuffer, histogramBuffer, image, channel, exception);
2030  if (status == MagickFalse)
2031    goto cleanup;
2032
2033  /* read from the kenel output */
2034  if (ALIGNED(histogram,cl_uint4))
2035  {
2036    length = (MaxMap+1);
2037    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2038  }
2039  else
2040  {
2041    length = (MaxMap+1);
2042    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2043  }
2044  if (clStatus != CL_SUCCESS)
2045  {
2046    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
2047    goto cleanup;
2048  }
2049
2050  /* unmap, don't block gpu to use this buffer again.  */
2051  if (ALIGNED(histogram,cl_uint4))
2052  {
2053    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2054    if (clStatus != CL_SUCCESS)
2055    {
2056      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
2057      goto cleanup;
2058    }
2059  }
2060
2061  /* recreate input buffer later, in case image updated */
2062#ifdef RECREATEBUFFER
2063  if (imageBuffer!=NULL)
2064    clEnv->library->clReleaseMemObject(imageBuffer);
2065#endif
2066
2067  /* CPU stuff */
2068  /*
2069     Find the histogram boundaries by locating the black/white levels.
2070  */
2071  black.red=0.0;
2072  white.red=MaxRange(QuantumRange);
2073  if ((channel & RedChannel) != 0)
2074  {
2075    intensity=0.0;
2076    for (i=0; i <= (ssize_t) MaxMap; i++)
2077    {
2078      intensity+=histogram[i].s[2];
2079      if (intensity > black_point)
2080        break;
2081    }
2082    black.red=(MagickRealType) i;
2083    intensity=0.0;
2084    for (i=(ssize_t) MaxMap; i != 0; i--)
2085    {
2086      intensity+=histogram[i].s[2];
2087      if (intensity > ((double) image->columns*image->rows-white_point))
2088        break;
2089    }
2090    white.red=(MagickRealType) i;
2091  }
2092  black.green=0.0;
2093  white.green=MaxRange(QuantumRange);
2094  if ((channel & GreenChannel) != 0)
2095  {
2096    intensity=0.0;
2097    for (i=0; i <= (ssize_t) MaxMap; i++)
2098    {
2099      intensity+=histogram[i].s[2];
2100      if (intensity > black_point)
2101        break;
2102    }
2103    black.green=(MagickRealType) i;
2104    intensity=0.0;
2105    for (i=(ssize_t) MaxMap; i != 0; i--)
2106    {
2107      intensity+=histogram[i].s[2];
2108      if (intensity > ((double) image->columns*image->rows-white_point))
2109        break;
2110    }
2111    white.green=(MagickRealType) i;
2112  }
2113  black.blue=0.0;
2114  white.blue=MaxRange(QuantumRange);
2115  if ((channel & BlueChannel) != 0)
2116  {
2117    intensity=0.0;
2118    for (i=0; i <= (ssize_t) MaxMap; i++)
2119    {
2120      intensity+=histogram[i].s[2];
2121      if (intensity > black_point)
2122        break;
2123    }
2124    black.blue=(MagickRealType) i;
2125    intensity=0.0;
2126    for (i=(ssize_t) MaxMap; i != 0; i--)
2127    {
2128      intensity+=histogram[i].s[2];
2129      if (intensity > ((double) image->columns*image->rows-white_point))
2130        break;
2131    }
2132    white.blue=(MagickRealType) i;
2133  }
2134  black.alpha=0.0;
2135  white.alpha=MaxRange(QuantumRange);
2136  if ((channel & OpacityChannel) != 0)
2137  {
2138    intensity=0.0;
2139    for (i=0; i <= (ssize_t) MaxMap; i++)
2140    {
2141      intensity+=histogram[i].s[2];
2142      if (intensity > black_point)
2143        break;
2144    }
2145    black.alpha=(MagickRealType) i;
2146    intensity=0.0;
2147    for (i=(ssize_t) MaxMap; i != 0; i--)
2148    {
2149      intensity+=histogram[i].s[2];
2150      if (intensity > ((double) image->columns*image->rows-white_point))
2151        break;
2152    }
2153    white.alpha=(MagickRealType) i;
2154  }
2155  /*
2156  black.index=0.0;
2157  white.index=MaxRange(QuantumRange);
2158  if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace))
2159  {
2160    intensity=0.0;
2161    for (i=0; i <= (ssize_t) MaxMap; i++)
2162    {
2163      intensity+=histogram[i].index;
2164      if (intensity > black_point)
2165        break;
2166    }
2167    black.index=(MagickRealType) i;
2168    intensity=0.0;
2169    for (i=(ssize_t) MaxMap; i != 0; i--)
2170    {
2171      intensity+=histogram[i].index;
2172      if (intensity > ((double) image->columns*image->rows-white_point))
2173        break;
2174    }
2175    white.index=(MagickRealType) i;
2176  }
2177  */
2178
2179
2180  stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
2181    sizeof(*stretch_map));
2182
2183  if (stretch_map == (PixelPacket *) NULL)
2184    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
2185      image->filename);
2186
2187  /*
2188    Stretch the histogram to create the stretched image mapping.
2189  */
2190  (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
2191  for (i=0; i <= (ssize_t) MaxMap; i++)
2192  {
2193    if ((channel & RedChannel) != 0)
2194    {
2195      if (i < (ssize_t) black.red)
2196        stretch_map[i].red=(Quantum) 0;
2197      else
2198        if (i > (ssize_t) white.red)
2199          stretch_map[i].red=QuantumRange;
2200        else
2201          if (black.red != white.red)
2202            stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
2203                  (i-black.red)/(white.red-black.red)));
2204    }
2205    if ((channel & GreenChannel) != 0)
2206    {
2207      if (i < (ssize_t) black.green)
2208        stretch_map[i].green=0;
2209      else
2210        if (i > (ssize_t) white.green)
2211          stretch_map[i].green=QuantumRange;
2212        else
2213          if (black.green != white.green)
2214            stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
2215                  (i-black.green)/(white.green-black.green)));
2216    }
2217    if ((channel & BlueChannel) != 0)
2218    {
2219      if (i < (ssize_t) black.blue)
2220        stretch_map[i].blue=0;
2221      else
2222        if (i > (ssize_t) white.blue)
2223          stretch_map[i].blue= QuantumRange;
2224        else
2225          if (black.blue != white.blue)
2226            stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
2227                  (i-black.blue)/(white.blue-black.blue)));
2228    }
2229    if ((channel & OpacityChannel) != 0)
2230    {
2231      if (i < (ssize_t) black.alpha)
2232        stretch_map[i].alpha=0;
2233      else
2234        if (i > (ssize_t) white.alpha)
2235          stretch_map[i].alpha=QuantumRange;
2236        else
2237          if (black.alpha != white.alpha)
2238            stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
2239                  (i-black.alpha)/(white.alpha-black.alpha)));
2240    }
2241    /*
2242    if (((channel & IndexChannel) != 0) &&
2243        (image->colorspace == CMYKColorspace))
2244    {
2245      if (i < (ssize_t) black.index)
2246        stretch_map[i].index=0;
2247      else
2248        if (i > (ssize_t) white.index)
2249          stretch_map[i].index=QuantumRange;
2250        else
2251          if (black.index != white.index)
2252            stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap*
2253                  (i-black.index)/(white.index-black.index)));
2254    }
2255    */
2256  }
2257
2258  /*
2259    Stretch the image.
2260  */
2261  if (((channel & OpacityChannel) != 0) || (((channel & IndexChannel) != 0) &&
2262      (image->colorspace == CMYKColorspace)))
2263    image->storage_class=DirectClass;
2264  if (image->storage_class == PseudoClass)
2265  {
2266    /*
2267       Stretch colormap.
2268       */
2269    for (i=0; i < (ssize_t) image->colors; i++)
2270    {
2271      if ((channel & RedChannel) != 0)
2272      {
2273        if (black.red != white.red)
2274          image->colormap[i].red=stretch_map[
2275            ScaleQuantumToMap(image->colormap[i].red)].red;
2276      }
2277      if ((channel & GreenChannel) != 0)
2278      {
2279        if (black.green != white.green)
2280          image->colormap[i].green=stretch_map[
2281            ScaleQuantumToMap(image->colormap[i].green)].green;
2282      }
2283      if ((channel & BlueChannel) != 0)
2284      {
2285        if (black.blue != white.blue)
2286          image->colormap[i].blue=stretch_map[
2287            ScaleQuantumToMap(image->colormap[i].blue)].blue;
2288      }
2289      if ((channel & OpacityChannel) != 0)
2290      {
2291        if (black.alpha != white.alpha)
2292          image->colormap[i].alpha=stretch_map[
2293            ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2294      }
2295    }
2296  }
2297
2298  /*
2299    Stretch image.
2300  */
2301
2302
2303  /* GPU can work on this again, image and equalize map as input
2304    image:        uchar4 (CLPixelPacket)
2305    stretch_map:  uchar4 (PixelPacket)
2306    black, white: float4 (FloatPixelPacket) */
2307
2308#ifdef RECREATEBUFFER
2309  /* If the host pointer is aligned to the size of CLPixelPacket,
2310     then use the host buffer directly from the GPU; otherwise,
2311     create a buffer on the GPU and copy the data over */
2312  if (ALIGNED(inputPixels,CLPixelPacket))
2313  {
2314    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2315  }
2316  else
2317  {
2318    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2319  }
2320  /* create a CL buffer from image pixel buffer */
2321  length = image->columns * image->rows;
2322  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2323  if (clStatus != CL_SUCCESS)
2324  {
2325    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2326    goto cleanup;
2327  }
2328#endif
2329
2330  /* Create and initialize OpenCL buffers. */
2331  if (ALIGNED(stretch_map, PixelPacket))
2332  {
2333    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2334    hostPtr = stretch_map;
2335  }
2336  else
2337  {
2338    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2339    hostPtr = stretch_map;
2340  }
2341  /* create a CL buffer for stretch_map  */
2342  length = (MaxMap+1);
2343  stretchMapBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2344  if (clStatus != CL_SUCCESS)
2345  {
2346    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2347    goto cleanup;
2348  }
2349
2350  /* get the OpenCL kernel */
2351  stretchKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ContrastStretch");
2352  if (stretchKernel == NULL)
2353  {
2354    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2355    goto cleanup;
2356  }
2357
2358  /* set the kernel arguments */
2359  i = 0;
2360  clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2361  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&channel);
2362  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
2363  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&white);
2364  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&black);
2365  if (clStatus != CL_SUCCESS)
2366  {
2367    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2368    goto cleanup;
2369  }
2370
2371  /* launch the kernel */
2372  global_work_size[0] = image->columns;
2373  global_work_size[1] = image->rows;
2374
2375  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2376
2377  if (clStatus != CL_SUCCESS)
2378  {
2379    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2380    goto cleanup;
2381  }
2382  clEnv->library->clFlush(queue);
2383
2384  RecordProfileData(clEnv,ContrastStretchKernel,event);
2385  clEnv->library->clReleaseEvent(event);
2386
2387  /* read the data back */
2388  if (ALIGNED(inputPixels,CLPixelPacket))
2389  {
2390    length = image->columns * image->rows;
2391    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2392  }
2393  else
2394  {
2395    length = image->columns * image->rows;
2396    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2397  }
2398  if (clStatus != CL_SUCCESS)
2399  {
2400    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
2401    goto cleanup;
2402  }
2403
2404  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2405
2406cleanup:
2407  OpenCLLogException(__FUNCTION__,__LINE__,exception);
2408
2409  image_view=DestroyCacheView(image_view);
2410
2411  if (imageBuffer!=NULL)
2412    clEnv->library->clReleaseMemObject(imageBuffer);
2413
2414  if (stretchMapBuffer!=NULL)
2415    clEnv->library->clReleaseMemObject(stretchMapBuffer);
2416  if (stretch_map!=NULL)
2417    stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
2418
2419
2420  if (histogramBuffer!=NULL)
2421    clEnv->library->clReleaseMemObject(histogramBuffer);
2422  if (histogram!=NULL)
2423    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2424
2425
2426  if (histogramKernel!=NULL)
2427    RelinquishOpenCLKernel(clEnv, histogramKernel);
2428  if (stretchKernel!=NULL)
2429    RelinquishOpenCLKernel(clEnv, stretchKernel);
2430
2431  if (queue != NULL)
2432    RelinquishOpenCLCommandQueue(clEnv, queue);
2433
2434  return(outputReady);
2435}
2436
2437MagickExport MagickBooleanType AccelerateContrastStretchImage(
2438  Image *image,const ChannelType channel,const double black_point,
2439  const double white_point,ExceptionInfo *exception)
2440{
2441  MagickBooleanType
2442    status;
2443
2444  assert(image != NULL);
2445  assert(exception != (ExceptionInfo *) NULL);
2446
2447  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
2448      (checkAccelerateCondition(image, channel) == MagickFalse) ||
2449      (checkHistogramCondition(image, channel) == MagickFalse))
2450    return(MagickFalse);
2451
2452  status=ComputeContrastStretchImage(image,channel, black_point, white_point, exception);
2453  return(status);
2454}
2455
2456/*
2457%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2458%                                                                             %
2459%                                                                             %
2460%                                                                             %
2461%     A c c e l e r a t e C o n v o l v e I m a g e                           %
2462%                                                                             %
2463%                                                                             %
2464%                                                                             %
2465%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2466*/
2467
2468static Image *ComputeConvolveImage(const Image* image,
2469  const ChannelType channel,const KernelInfo *kernel,ExceptionInfo *exception)
2470{
2471  CacheView
2472    *filteredImage_view,
2473    *image_view;
2474
2475  cl_command_queue
2476    queue;
2477
2478  cl_context
2479    context;
2480
2481  cl_kernel
2482    clkernel;
2483
2484  cl_event
2485    event;
2486
2487  cl_int
2488    clStatus;
2489
2490  cl_mem
2491    convolutionKernel,
2492    filteredImageBuffer,
2493    imageBuffer;
2494
2495  cl_mem_flags
2496    mem_flags;
2497
2498  cl_ulong
2499    deviceLocalMemorySize;
2500
2501  const void
2502    *inputPixels;
2503
2504  float
2505    *kernelBufferPtr;
2506
2507  Image
2508    *filteredImage;
2509
2510  MagickBooleanType
2511    outputReady;
2512
2513  MagickCLEnv
2514    clEnv;
2515
2516  MagickSizeType
2517    length;
2518
2519  size_t
2520    global_work_size[3],
2521    localGroupSize[3],
2522    localMemoryRequirement;
2523
2524  unsigned
2525    kernelSize;
2526
2527  unsigned int
2528    filterHeight,
2529    filterWidth,
2530    i,
2531    imageHeight,
2532    imageWidth,
2533    matte;
2534
2535  void
2536    *filteredPixels,
2537    *hostPtr;
2538
2539  /* intialize all CL objects to NULL */
2540  context = NULL;
2541  imageBuffer = NULL;
2542  filteredImageBuffer = NULL;
2543  convolutionKernel = NULL;
2544  clkernel = NULL;
2545  queue = NULL;
2546
2547  filteredImage = NULL;
2548  filteredImage_view = NULL;
2549  outputReady = MagickFalse;
2550
2551  clEnv = GetDefaultOpenCLEnv();
2552  context = GetOpenCLContext(clEnv);
2553
2554  image_view=AcquireVirtualCacheView(image,exception);
2555  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
2556  if (inputPixels == (const void *) NULL)
2557  {
2558    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2559    goto cleanup;
2560  }
2561
2562  /* Create and initialize OpenCL buffers. */
2563
2564  /* If the host pointer is aligned to the size of CLPixelPacket,
2565     then use the host buffer directly from the GPU; otherwise,
2566     create a buffer on the GPU and copy the data over */
2567  if (ALIGNED(inputPixels,CLPixelPacket))
2568  {
2569    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2570  }
2571  else
2572  {
2573    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2574  }
2575  /* create a CL buffer from image pixel buffer */
2576  length = image->columns * image->rows;
2577  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2578  if (clStatus != CL_SUCCESS)
2579  {
2580    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2581    goto cleanup;
2582  }
2583
2584  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
2585  assert(filteredImage != NULL);
2586  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
2587  {
2588    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2589    goto cleanup;
2590  }
2591  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
2592  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
2593  if (filteredPixels == (void *) NULL)
2594  {
2595    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2596    goto cleanup;
2597  }
2598
2599  if (ALIGNED(filteredPixels,CLPixelPacket))
2600  {
2601    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2602    hostPtr = filteredPixels;
2603  }
2604  else
2605  {
2606    mem_flags = CL_MEM_WRITE_ONLY;
2607    hostPtr = NULL;
2608  }
2609  /* create a CL buffer from image pixel buffer */
2610  length = image->columns * image->rows;
2611  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2612  if (clStatus != CL_SUCCESS)
2613  {
2614    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2615    goto cleanup;
2616  }
2617
2618  kernelSize = (unsigned int) (kernel->width * kernel->height);
2619  convolutionKernel = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
2620  if (clStatus != CL_SUCCESS)
2621  {
2622    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2623    goto cleanup;
2624  }
2625
2626  queue = AcquireOpenCLCommandQueue(clEnv);
2627
2628  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
2629          , 0, NULL, NULL, &clStatus);
2630  if (clStatus != CL_SUCCESS)
2631  {
2632    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
2633    goto cleanup;
2634  }
2635  for (i = 0; i < kernelSize; i++)
2636  {
2637    kernelBufferPtr[i] = (float) kernel->values[i];
2638  }
2639  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
2640  if (clStatus != CL_SUCCESS)
2641  {
2642    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
2643    goto cleanup;
2644  }
2645  clEnv->library->clFlush(queue);
2646
2647  deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
2648
2649  /* Compute the local memory requirement for a 16x16 workgroup.
2650     If it's larger than 16k, reduce the workgroup size to 8x8 */
2651  localGroupSize[0] = 16;
2652  localGroupSize[1] = 16;
2653  localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
2654    + kernel->width*kernel->height*sizeof(float);
2655
2656  if (localMemoryRequirement > deviceLocalMemorySize)
2657  {
2658    localGroupSize[0] = 8;
2659    localGroupSize[1] = 8;
2660    localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
2661      + kernel->width*kernel->height*sizeof(float);
2662  }
2663  if (localMemoryRequirement <= deviceLocalMemorySize)
2664  {
2665    /* get the OpenCL kernel */
2666    clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ConvolveOptimized");
2667    if (clkernel == NULL)
2668    {
2669      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2670      goto cleanup;
2671    }
2672
2673    /* set the kernel arguments */
2674    i = 0;
2675    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2676    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2677    imageWidth = (unsigned int) image->columns;
2678    imageHeight = (unsigned int) image->rows;
2679    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
2680    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
2681    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
2682    filterWidth = (unsigned int) kernel->width;
2683    filterHeight = (unsigned int) kernel->height;
2684    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
2685    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
2686    matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2687    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
2688    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
2689    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
2690    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
2691    if (clStatus != CL_SUCCESS)
2692    {
2693      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2694      goto cleanup;
2695    }
2696
2697    /* pad the global size to a multiple of the local work size dimension */
2698    global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
2699    global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
2700
2701    /* launch the kernel */
2702	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
2703    if (clStatus != CL_SUCCESS)
2704    {
2705      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2706      goto cleanup;
2707    }
2708    RecordProfileData(clEnv,ConvolveKernel,event);
2709    clEnv->library->clReleaseEvent(event);
2710  }
2711  else
2712  {
2713    /* get the OpenCL kernel */
2714    clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Convolve");
2715    if (clkernel == NULL)
2716    {
2717      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
2718      goto cleanup;
2719    }
2720
2721    /* set the kernel arguments */
2722    i = 0;
2723    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2724    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
2725    imageWidth = (unsigned int) image->columns;
2726    imageHeight = (unsigned int) image->rows;
2727    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
2728    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
2729    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
2730    filterWidth = (unsigned int) kernel->width;
2731    filterHeight = (unsigned int) kernel->height;
2732    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
2733    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
2734    matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2735    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
2736    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
2737    if (clStatus != CL_SUCCESS)
2738    {
2739      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
2740      goto cleanup;
2741    }
2742
2743    localGroupSize[0] = 8;
2744    localGroupSize[1] = 8;
2745    global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
2746    global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
2747	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
2748
2749    if (clStatus != CL_SUCCESS)
2750    {
2751      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
2752      goto cleanup;
2753    }
2754  }
2755  clEnv->library->clFlush(queue);
2756  RecordProfileData(clEnv,ConvolveKernel,event);
2757  clEnv->library->clReleaseEvent(event);
2758
2759  if (ALIGNED(filteredPixels,CLPixelPacket))
2760  {
2761    length = image->columns * image->rows;
2762    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2763  }
2764  else
2765  {
2766    length = image->columns * image->rows;
2767    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2768  }
2769  if (clStatus != CL_SUCCESS)
2770  {
2771    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
2772    goto cleanup;
2773  }
2774
2775  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2776
2777cleanup:
2778  OpenCLLogException(__FUNCTION__,__LINE__,exception);
2779
2780  image_view=DestroyCacheView(image_view);
2781  if (filteredImage_view != NULL)
2782    filteredImage_view=DestroyCacheView(filteredImage_view);
2783
2784  if (imageBuffer != NULL)
2785    clEnv->library->clReleaseMemObject(imageBuffer);
2786
2787  if (filteredImageBuffer != NULL)
2788    clEnv->library->clReleaseMemObject(filteredImageBuffer);
2789
2790  if (convolutionKernel != NULL)
2791    clEnv->library->clReleaseMemObject(convolutionKernel);
2792
2793  if (clkernel != NULL)
2794    RelinquishOpenCLKernel(clEnv, clkernel);
2795
2796  if (queue != NULL)
2797    RelinquishOpenCLCommandQueue(clEnv, queue);
2798
2799  if (outputReady == MagickFalse)
2800  {
2801    if (filteredImage != NULL)
2802    {
2803      DestroyImage(filteredImage);
2804      filteredImage = NULL;
2805    }
2806  }
2807
2808  return(filteredImage);
2809}
2810
2811MagickExport Image *AccelerateConvolveImage(const Image *image,
2812  const ChannelType channel,const KernelInfo *kernel,ExceptionInfo *exception)
2813{
2814  Image
2815    *filteredImage;
2816
2817  assert(image != NULL);
2818  assert(kernel != (KernelInfo *) NULL);
2819  assert(exception != (ExceptionInfo *) NULL);
2820
2821  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
2822      (checkAccelerateCondition(image, channel) == MagickFalse))
2823    return NULL;
2824
2825  filteredImage=ComputeConvolveImage(image, channel, kernel, exception);
2826  return(filteredImage);
2827}
2828
2829/*
2830%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2831%                                                                             %
2832%                                                                             %
2833%                                                                             %
2834%     A c c e l e r a t e D e s p e c k l e I m a g e                         %
2835%                                                                             %
2836%                                                                             %
2837%                                                                             %
2838%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2839*/
2840
2841static Image *ComputeDespeckleImage(const Image *image,
2842  ExceptionInfo*exception)
2843{
2844  static const int
2845    X[4] = {0, 1, 1,-1},
2846    Y[4] = {1, 0, 1, 1};
2847
2848  CacheView
2849    *filteredImage_view,
2850    *image_view;
2851
2852  cl_command_queue
2853    queue;
2854
2855  cl_context
2856    context;
2857
2858  cl_int
2859    clStatus;
2860
2861  cl_kernel
2862    hullPass1,
2863    hullPass2;
2864
2865  cl_event
2866    event;
2867
2868  cl_mem_flags
2869    mem_flags;
2870
2871  cl_mem
2872    filteredImageBuffer,
2873    imageBuffer,
2874    tempImageBuffer[2];
2875
2876  const void
2877    *inputPixels;
2878
2879  Image
2880    *filteredImage;
2881
2882  int
2883    k,
2884    matte;
2885
2886  MagickBooleanType
2887    outputReady;
2888
2889  MagickCLEnv
2890    clEnv;
2891
2892  MagickSizeType
2893    length;
2894
2895  size_t
2896    global_work_size[2];
2897
2898  unsigned int
2899    imageHeight,
2900    imageWidth;
2901
2902  void
2903    *filteredPixels,
2904    *hostPtr;
2905
2906  outputReady = MagickFalse;
2907  clEnv = NULL;
2908  inputPixels = NULL;
2909  filteredImage = NULL;
2910  filteredImage_view = NULL;
2911  filteredPixels = NULL;
2912  context = NULL;
2913  imageBuffer = NULL;
2914  filteredImageBuffer = NULL;
2915  hullPass1 = NULL;
2916  hullPass2 = NULL;
2917  queue = NULL;
2918  tempImageBuffer[0] = tempImageBuffer[1] = NULL;
2919  clEnv = GetDefaultOpenCLEnv();
2920  context = GetOpenCLContext(clEnv);
2921  queue = AcquireOpenCLCommandQueue(clEnv);
2922
2923  image_view=AcquireVirtualCacheView(image,exception);
2924  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
2925  if (inputPixels == (void *) NULL)
2926  {
2927    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2928    goto cleanup;
2929  }
2930
2931  if (ALIGNED(inputPixels,CLPixelPacket))
2932  {
2933    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2934  }
2935  else
2936  {
2937    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2938  }
2939  /* create a CL buffer from image pixel buffer */
2940  length = image->columns * image->rows;
2941  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2942  if (clStatus != CL_SUCCESS)
2943  {
2944    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2945    goto cleanup;
2946  }
2947
2948  mem_flags = CL_MEM_READ_WRITE;
2949  length = image->columns * image->rows;
2950  for (k = 0; k < 2; k++)
2951  {
2952    tempImageBuffer[k] = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
2953    if (clStatus != CL_SUCCESS)
2954    {
2955      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2956      goto cleanup;
2957    }
2958  }
2959
2960  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
2961  assert(filteredImage != NULL);
2962  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
2963  {
2964    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
2965    goto cleanup;
2966  }
2967  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
2968  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
2969  if (filteredPixels == (void *) NULL)
2970  {
2971    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2972    goto cleanup;
2973  }
2974
2975  if (ALIGNED(filteredPixels,CLPixelPacket))
2976  {
2977    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2978    hostPtr = filteredPixels;
2979  }
2980  else
2981  {
2982    mem_flags = CL_MEM_WRITE_ONLY;
2983    hostPtr = NULL;
2984  }
2985  /* create a CL buffer from image pixel buffer */
2986  length = image->columns * image->rows;
2987  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2988  if (clStatus != CL_SUCCESS)
2989  {
2990    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2991    goto cleanup;
2992  }
2993
2994  hullPass1 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass1");
2995  hullPass2 = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "HullPass2");
2996
2997  clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2998  clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2999  imageWidth = (unsigned int) image->columns;
3000  clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
3001  imageHeight = (unsigned int) image->rows;
3002  clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
3003  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3004  clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
3005  if (clStatus != CL_SUCCESS)
3006  {
3007    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3008    goto cleanup;
3009  }
3010
3011  clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
3012  clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
3013  imageWidth = (unsigned int) image->columns;
3014  clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
3015  imageHeight = (unsigned int) image->rows;
3016  clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
3017  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3018  clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
3019  if (clStatus != CL_SUCCESS)
3020  {
3021    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3022    goto cleanup;
3023  }
3024
3025
3026  global_work_size[0] = image->columns;
3027  global_work_size[1] = image->rows;
3028
3029
3030  for (k = 0; k < 4; k++)
3031  {
3032    cl_int2 offset;
3033    int polarity;
3034
3035
3036    offset.s[0] = X[k];
3037    offset.s[1] = Y[k];
3038    polarity = 1;
3039    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
3040    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
3041    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
3042    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
3043    if (clStatus != CL_SUCCESS)
3044    {
3045      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3046      goto cleanup;
3047    }
3048    /* launch the kernel */
3049	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3050    if (clStatus != CL_SUCCESS)
3051    {
3052      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3053      goto cleanup;
3054    }
3055    RecordProfileData(clEnv,HullPass1Kernel,event);
3056    clEnv->library->clReleaseEvent(event);
3057
3058    /* launch the kernel */
3059	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3060    if (clStatus != CL_SUCCESS)
3061    {
3062      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3063      goto cleanup;
3064    }
3065    RecordProfileData(clEnv,HullPass2Kernel,event);
3066    clEnv->library->clReleaseEvent(event);
3067
3068    if (k == 0)
3069      clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
3070    offset.s[0] = -X[k];
3071    offset.s[1] = -Y[k];
3072    polarity = 1;
3073    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
3074    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
3075    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
3076    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
3077    if (clStatus != CL_SUCCESS)
3078    {
3079      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3080      goto cleanup;
3081    }
3082    /* launch the kernel */
3083	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3084    if (clStatus != CL_SUCCESS)
3085    {
3086      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3087      goto cleanup;
3088    }
3089    RecordProfileData(clEnv,HullPass1Kernel,event);
3090    clEnv->library->clReleaseEvent(event);
3091
3092    /* launch the kernel */
3093	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3094    if (clStatus != CL_SUCCESS)
3095    {
3096      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3097      goto cleanup;
3098    }
3099    RecordProfileData(clEnv,HullPass2Kernel,event);
3100    clEnv->library->clReleaseEvent(event);
3101
3102    offset.s[0] = -X[k];
3103    offset.s[1] = -Y[k];
3104    polarity = -1;
3105    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
3106    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
3107    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
3108    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
3109    if (clStatus != CL_SUCCESS)
3110    {
3111      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3112      goto cleanup;
3113    }
3114    /* launch the kernel */
3115	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3116    if (clStatus != CL_SUCCESS)
3117    {
3118      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3119      goto cleanup;
3120    }
3121    RecordProfileData(clEnv,HullPass1Kernel,event);
3122    clEnv->library->clReleaseEvent(event);
3123
3124    /* launch the kernel */
3125	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3126    if (clStatus != CL_SUCCESS)
3127    {
3128      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3129      goto cleanup;
3130    }
3131    RecordProfileData(clEnv,HullPass2Kernel,event);
3132    clEnv->library->clReleaseEvent(event);
3133
3134    offset.s[0] = X[k];
3135    offset.s[1] = Y[k];
3136    polarity = -1;
3137    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
3138    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
3139    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
3140    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
3141
3142    if (k == 3)
3143      clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
3144
3145    if (clStatus != CL_SUCCESS)
3146    {
3147      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3148      goto cleanup;
3149    }
3150    /* launch the kernel */
3151	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3152    if (clStatus != CL_SUCCESS)
3153    {
3154      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3155      goto cleanup;
3156    }
3157    RecordProfileData(clEnv,HullPass1Kernel,event);
3158    clEnv->library->clReleaseEvent(event);
3159
3160    /* launch the kernel */
3161	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3162    if (clStatus != CL_SUCCESS)
3163    {
3164      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3165      goto cleanup;
3166    }
3167    RecordProfileData(clEnv,HullPass2Kernel,event);
3168    clEnv->library->clReleaseEvent(event);
3169  }
3170
3171  if (ALIGNED(filteredPixels,CLPixelPacket))
3172  {
3173    length = image->columns * image->rows;
3174    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3175  }
3176  else
3177  {
3178    length = image->columns * image->rows;
3179    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3180  }
3181  if (clStatus != CL_SUCCESS)
3182  {
3183    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
3184    goto cleanup;
3185  }
3186
3187  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3188
3189cleanup:
3190  OpenCLLogException(__FUNCTION__,__LINE__,exception);
3191
3192  image_view=DestroyCacheView(image_view);
3193  if (filteredImage_view != NULL)
3194    filteredImage_view=DestroyCacheView(filteredImage_view);
3195
3196  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
3197  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
3198  for (k = 0; k < 2; k++)
3199  {
3200    if (tempImageBuffer[k]!=NULL)	      clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
3201  }
3202  if (filteredImageBuffer!=NULL)	      clEnv->library->clReleaseMemObject(filteredImageBuffer);
3203  if (hullPass1!=NULL)			      RelinquishOpenCLKernel(clEnv, hullPass1);
3204  if (hullPass2!=NULL)			      RelinquishOpenCLKernel(clEnv, hullPass2);
3205  if (outputReady == MagickFalse && filteredImage != NULL)
3206    filteredImage=DestroyImage(filteredImage);
3207  return(filteredImage);
3208}
3209
3210MagickExport Image *AccelerateDespeckleImage(const Image* image,
3211  ExceptionInfo* exception)
3212{
3213  Image
3214    *filteredImage;
3215
3216  assert(image != NULL);
3217  assert(exception != (ExceptionInfo *) NULL);
3218
3219  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
3220      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
3221    return NULL;
3222
3223  filteredImage=ComputeDespeckleImage(image,exception);
3224  return(filteredImage);
3225}
3226
3227/*
3228%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3229%                                                                             %
3230%                                                                             %
3231%                                                                             %
3232%     A c c e l e r a t e E q u a l i z e I m a g e                           %
3233%                                                                             %
3234%                                                                             %
3235%                                                                             %
3236%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3237*/
3238
3239static MagickBooleanType ComputeEqualizeImage(Image *image,
3240  const ChannelType channel,ExceptionInfo *exception)
3241{
3242#define EqualizeImageTag  "Equalize/Image"
3243
3244  CacheView
3245    *image_view;
3246
3247  cl_command_queue
3248    queue;
3249
3250  cl_context
3251    context;
3252
3253  cl_int
3254    clStatus;
3255
3256  cl_mem_flags
3257    mem_flags;
3258
3259  cl_mem
3260    equalizeMapBuffer,
3261    histogramBuffer,
3262    imageBuffer;
3263
3264  cl_kernel
3265    equalizeKernel,
3266    histogramKernel;
3267
3268  cl_event
3269    event;
3270
3271  cl_uint4
3272    *histogram;
3273
3274  FloatPixelPacket
3275    white,
3276    black,
3277    intensity,
3278    *map;
3279
3280  MagickBooleanType
3281    outputReady,
3282    status;
3283
3284  MagickCLEnv
3285    clEnv;
3286
3287  MagickSizeType
3288    length;
3289
3290  PixelPacket
3291    *equalize_map;
3292
3293  register ssize_t
3294    i;
3295
3296  size_t
3297    global_work_size[2];
3298
3299  void
3300    *hostPtr,
3301    *inputPixels;
3302
3303  map=NULL;
3304  histogram=NULL;
3305  equalize_map=NULL;
3306  inputPixels = NULL;
3307  imageBuffer = NULL;
3308  histogramBuffer = NULL;
3309  equalizeMapBuffer = NULL;
3310  histogramKernel = NULL;
3311  equalizeKernel = NULL;
3312  context = NULL;
3313  queue = NULL;
3314  outputReady = MagickFalse;
3315
3316  assert(image != (Image *) NULL);
3317  assert(image->signature == MagickCoreSignature);
3318  if (image->debug != MagickFalse)
3319    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3320
3321  /*
3322   * initialize opencl env
3323   */
3324  clEnv = GetDefaultOpenCLEnv();
3325  context = GetOpenCLContext(clEnv);
3326  queue = AcquireOpenCLCommandQueue(clEnv);
3327
3328  /*
3329    Allocate and initialize histogram arrays.
3330  */
3331  histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
3332  if (histogram == (cl_uint4 *) NULL)
3333      ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3334
3335  /* reset histogram */
3336  (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
3337
3338  /* Create and initialize OpenCL buffers. */
3339  /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
3340  /* assume this  will get a writable image */
3341  image_view=AcquireAuthenticCacheView(image,exception);
3342  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3343
3344  if (inputPixels == (void *) NULL)
3345  {
3346    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3347    goto cleanup;
3348  }
3349  /* If the host pointer is aligned to the size of CLPixelPacket,
3350     then use the host buffer directly from the GPU; otherwise,
3351     create a buffer on the GPU and copy the data over */
3352  if (ALIGNED(inputPixels,CLPixelPacket))
3353  {
3354    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3355  }
3356  else
3357  {
3358    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3359  }
3360  /* create a CL buffer from image pixel buffer */
3361  length = image->columns * image->rows;
3362  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3363  if (clStatus != CL_SUCCESS)
3364  {
3365    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3366    goto cleanup;
3367  }
3368
3369  /* If the host pointer is aligned to the size of cl_uint,
3370     then use the host buffer directly from the GPU; otherwise,
3371     create a buffer on the GPU and copy the data over */
3372  if (ALIGNED(histogram,cl_uint4))
3373  {
3374    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3375    hostPtr = histogram;
3376  }
3377  else
3378  {
3379    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3380    hostPtr = histogram;
3381  }
3382  /* create a CL buffer for histogram  */
3383  length = (MaxMap+1);
3384  histogramBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
3385  if (clStatus != CL_SUCCESS)
3386  {
3387    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3388    goto cleanup;
3389  }
3390
3391  status = LaunchHistogramKernel(clEnv, queue, imageBuffer, histogramBuffer, image, channel, exception);
3392  if (status == MagickFalse)
3393    goto cleanup;
3394
3395  /* read from the kenel output */
3396  if (ALIGNED(histogram,cl_uint4))
3397  {
3398    length = (MaxMap+1);
3399    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
3400  }
3401  else
3402  {
3403    length = (MaxMap+1);
3404    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
3405  }
3406  if (clStatus != CL_SUCCESS)
3407  {
3408    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
3409    goto cleanup;
3410  }
3411
3412  /* unmap, don't block gpu to use this buffer again.  */
3413  if (ALIGNED(histogram,cl_uint4))
3414  {
3415    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
3416    if (clStatus != CL_SUCCESS)
3417    {
3418      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
3419      goto cleanup;
3420    }
3421  }
3422
3423  /* recreate input buffer later, in case image updated */
3424#ifdef RECREATEBUFFER
3425  if (imageBuffer!=NULL)
3426    clEnv->library->clReleaseMemObject(imageBuffer);
3427#endif
3428
3429  /* CPU stuff */
3430  equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
3431  if (equalize_map == (PixelPacket *) NULL)
3432    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3433
3434  map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
3435  if (map == (FloatPixelPacket *) NULL)
3436    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
3437
3438  /*
3439    Integrate the histogram to get the equalization map.
3440  */
3441  (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
3442  for (i=0; i <= (ssize_t) MaxMap; i++)
3443  {
3444    if ((channel & SyncChannels) != 0)
3445    {
3446      intensity.red+=histogram[i].s[2];
3447      map[i]=intensity;
3448      continue;
3449    }
3450    if ((channel & RedChannel) != 0)
3451      intensity.red+=histogram[i].s[2];
3452    if ((channel & GreenChannel) != 0)
3453      intensity.green+=histogram[i].s[1];
3454    if ((channel & BlueChannel) != 0)
3455      intensity.blue+=histogram[i].s[0];
3456    if ((channel & OpacityChannel) != 0)
3457      intensity.alpha+=histogram[i].s[3];
3458    /*
3459    if (((channel & IndexChannel) != 0) &&
3460        (image->colorspace == CMYKColorspace))
3461    {
3462      intensity.index+=histogram[i].index;
3463    }
3464    */
3465    map[i]=intensity;
3466  }
3467  black=map[0];
3468  white=map[(int) MaxMap];
3469  (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
3470  for (i=0; i <= (ssize_t) MaxMap; i++)
3471  {
3472    if ((channel & SyncChannels) != 0)
3473    {
3474      if (white.red != black.red)
3475        equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3476                (map[i].red-black.red))/(white.red-black.red)));
3477      continue;
3478    }
3479    if (((channel & RedChannel) != 0) && (white.red != black.red))
3480      equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3481              (map[i].red-black.red))/(white.red-black.red)));
3482    if (((channel & GreenChannel) != 0) && (white.green != black.green))
3483      equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3484              (map[i].green-black.green))/(white.green-black.green)));
3485    if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3486      equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3487              (map[i].blue-black.blue))/(white.blue-black.blue)));
3488    if (((channel & OpacityChannel) != 0) && (white.alpha != black.alpha))
3489      equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3490              (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
3491    /*
3492    if ((((channel & IndexChannel) != 0) &&
3493          (image->colorspace == CMYKColorspace)) &&
3494        (white.index != black.index))
3495      equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
3496              (map[i].index-black.index))/(white.index-black.index)));
3497    */
3498  }
3499
3500  if (image->storage_class == PseudoClass)
3501  {
3502    /*
3503       Equalize colormap.
3504       */
3505    for (i=0; i < (ssize_t) image->colors; i++)
3506    {
3507      if ((channel & SyncChannels) != 0)
3508      {
3509        if (white.red != black.red)
3510        {
3511          image->colormap[i].red=equalize_map[
3512            ScaleQuantumToMap(image->colormap[i].red)].red;
3513          image->colormap[i].green=equalize_map[
3514            ScaleQuantumToMap(image->colormap[i].green)].red;
3515          image->colormap[i].blue=equalize_map[
3516            ScaleQuantumToMap(image->colormap[i].blue)].red;
3517          image->colormap[i].alpha=equalize_map[
3518            ScaleQuantumToMap(image->colormap[i].alpha)].red;
3519        }
3520        continue;
3521      }
3522      if (((channel & RedChannel) != 0) && (white.red != black.red))
3523        image->colormap[i].red=equalize_map[
3524          ScaleQuantumToMap(image->colormap[i].red)].red;
3525      if (((channel & GreenChannel) != 0) && (white.green != black.green))
3526        image->colormap[i].green=equalize_map[
3527          ScaleQuantumToMap(image->colormap[i].green)].green;
3528      if (((channel & BlueChannel) != 0) && (white.blue != black.blue))
3529        image->colormap[i].blue=equalize_map[
3530          ScaleQuantumToMap(image->colormap[i].blue)].blue;
3531      if (((channel & OpacityChannel) != 0) &&
3532          (white.alpha != black.alpha))
3533        image->colormap[i].alpha=equalize_map[
3534          ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
3535    }
3536  }
3537
3538  /*
3539    Equalize image.
3540  */
3541
3542  /* GPU can work on this again, image and equalize map as input
3543    image:        uchar4 (CLPixelPacket)
3544    equalize_map: uchar4 (PixelPacket)
3545    black, white: float4 (FloatPixelPacket) */
3546
3547#ifdef RECREATEBUFFER
3548  /* If the host pointer is aligned to the size of CLPixelPacket,
3549     then use the host buffer directly from the GPU; otherwise,
3550     create a buffer on the GPU and copy the data over */
3551  if (ALIGNED(inputPixels,CLPixelPacket))
3552  {
3553    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3554  }
3555  else
3556  {
3557    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3558  }
3559  /* create a CL buffer from image pixel buffer */
3560  length = image->columns * image->rows;
3561  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3562  if (clStatus != CL_SUCCESS)
3563  {
3564    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3565    goto cleanup;
3566  }
3567#endif
3568
3569  /* Create and initialize OpenCL buffers. */
3570  if (ALIGNED(equalize_map, PixelPacket))
3571  {
3572    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3573    hostPtr = equalize_map;
3574  }
3575  else
3576  {
3577    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3578    hostPtr = equalize_map;
3579  }
3580  /* create a CL buffer for eqaulize_map  */
3581  length = (MaxMap+1);
3582  equalizeMapBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
3583  if (clStatus != CL_SUCCESS)
3584  {
3585    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3586    goto cleanup;
3587  }
3588
3589  /* get the OpenCL kernel */
3590  equalizeKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Equalize");
3591  if (equalizeKernel == NULL)
3592  {
3593    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
3594    goto cleanup;
3595  }
3596
3597  /* set the kernel arguments */
3598  i = 0;
3599  clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3600  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&channel);
3601  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
3602  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
3603  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
3604  if (clStatus != CL_SUCCESS)
3605  {
3606    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3607    goto cleanup;
3608  }
3609
3610  /* launch the kernel */
3611  global_work_size[0] = image->columns;
3612  global_work_size[1] = image->rows;
3613
3614  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3615
3616  if (clStatus != CL_SUCCESS)
3617  {
3618    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3619    goto cleanup;
3620  }
3621  clEnv->library->clFlush(queue);
3622  RecordProfileData(clEnv,EqualizeKernel,event);
3623  clEnv->library->clReleaseEvent(event);
3624
3625  /* read the data back */
3626  if (ALIGNED(inputPixels,CLPixelPacket))
3627  {
3628    length = image->columns * image->rows;
3629    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3630  }
3631  else
3632  {
3633    length = image->columns * image->rows;
3634    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3635  }
3636  if (clStatus != CL_SUCCESS)
3637  {
3638    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
3639    goto cleanup;
3640  }
3641
3642  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3643
3644cleanup:
3645  OpenCLLogException(__FUNCTION__,__LINE__,exception);
3646
3647  image_view=DestroyCacheView(image_view);
3648
3649  if (imageBuffer!=NULL)
3650    clEnv->library->clReleaseMemObject(imageBuffer);
3651
3652  if (map!=NULL)
3653    map=(FloatPixelPacket *) RelinquishMagickMemory(map);
3654
3655  if (equalizeMapBuffer!=NULL)
3656    clEnv->library->clReleaseMemObject(equalizeMapBuffer);
3657  if (equalize_map!=NULL)
3658    equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
3659
3660  if (histogramBuffer!=NULL)
3661    clEnv->library->clReleaseMemObject(histogramBuffer);
3662  if (histogram!=NULL)
3663    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
3664
3665  if (histogramKernel!=NULL)
3666    RelinquishOpenCLKernel(clEnv, histogramKernel);
3667  if (equalizeKernel!=NULL)
3668    RelinquishOpenCLKernel(clEnv, equalizeKernel);
3669
3670  if (queue != NULL)
3671    RelinquishOpenCLCommandQueue(clEnv, queue);
3672
3673  return(outputReady);
3674}
3675
3676MagickExport MagickBooleanType AccelerateEqualizeImage(Image *image,
3677  const ChannelType channel,ExceptionInfo *exception)
3678{
3679  MagickBooleanType
3680    status;
3681
3682  assert(image != NULL);
3683  assert(exception != (ExceptionInfo *) NULL);
3684
3685  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
3686      (checkAccelerateCondition(image, channel) == MagickFalse) ||
3687      (checkHistogramCondition(image, channel) == MagickFalse))
3688    return(MagickFalse);
3689
3690  status=ComputeEqualizeImage(image,channel,exception);
3691  return(status);
3692}
3693
3694/*
3695%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3696%                                                                             %
3697%                                                                             %
3698%                                                                             %
3699%     A c c e l e r a t e F u n c t i o n I m a g e                           %
3700%                                                                             %
3701%                                                                             %
3702%                                                                             %
3703%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3704*/
3705
3706static MagickBooleanType ComputeFunctionImage(Image *image,
3707  const ChannelType channel,const MagickFunction function,
3708  const size_t number_parameters,const double *parameters,
3709  ExceptionInfo *exception)
3710{
3711  CacheView
3712    *image_view;
3713
3714  cl_command_queue
3715    queue;
3716
3717  cl_context
3718    context;
3719
3720  cl_int
3721    clStatus;
3722
3723  cl_kernel
3724    clkernel;
3725
3726  cl_event
3727    event;
3728
3729  cl_mem
3730    imageBuffer,
3731    parametersBuffer;
3732
3733  cl_mem_flags
3734    mem_flags;
3735
3736  float
3737    *parametersBufferPtr;
3738
3739  MagickBooleanType
3740    status;
3741
3742  MagickCLEnv
3743    clEnv;
3744
3745  MagickSizeType
3746    length;
3747
3748  size_t
3749    globalWorkSize[2];
3750
3751  unsigned int
3752    i;
3753
3754  void
3755    *pixels;
3756
3757  status = MagickFalse;
3758
3759  context = NULL;
3760  clkernel = NULL;
3761  queue = NULL;
3762  imageBuffer = NULL;
3763  parametersBuffer = NULL;
3764
3765  clEnv = GetDefaultOpenCLEnv();
3766  context = GetOpenCLContext(clEnv);
3767
3768  image_view=AcquireAuthenticCacheView(image,exception);
3769  pixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3770  if (pixels == (void *) NULL)
3771  {
3772    (void) OpenCLThrowMagickException(exception, GetMagickModule(), CacheWarning,
3773      "GetPixelCachePixels failed.",
3774      "'%s'", image->filename);
3775    goto cleanup;
3776  }
3777
3778
3779  if (ALIGNED(pixels,CLPixelPacket))
3780  {
3781    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3782  }
3783  else
3784  {
3785    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3786  }
3787  /* create a CL buffer from image pixel buffer */
3788  length = image->columns * image->rows;
3789  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)pixels, &clStatus);
3790  if (clStatus != CL_SUCCESS)
3791  {
3792    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3793    goto cleanup;
3794  }
3795
3796  parametersBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, number_parameters * sizeof(float), NULL, &clStatus);
3797  if (clStatus != CL_SUCCESS)
3798  {
3799    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3800    goto cleanup;
3801  }
3802
3803  queue = AcquireOpenCLCommandQueue(clEnv);
3804
3805  parametersBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, parametersBuffer, CL_TRUE, CL_MAP_WRITE, 0, number_parameters * sizeof(float)
3806                , 0, NULL, NULL, &clStatus);
3807  if (clStatus != CL_SUCCESS)
3808  {
3809    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
3810    goto cleanup;
3811  }
3812  for (i = 0; i < number_parameters; i++)
3813  {
3814    parametersBufferPtr[i] = (float)parameters[i];
3815  }
3816  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, parametersBuffer, parametersBufferPtr, 0, NULL, NULL);
3817  if (clStatus != CL_SUCCESS)
3818  {
3819    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
3820    goto cleanup;
3821  }
3822  clEnv->library->clFlush(queue);
3823
3824  clkernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ComputeFunction");
3825  if (clkernel == NULL)
3826  {
3827    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
3828    goto cleanup;
3829  }
3830
3831  /* set the kernel arguments */
3832  i = 0;
3833  clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3834  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&channel);
3835  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(MagickFunction),(void *)&function);
3836  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&number_parameters);
3837  clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
3838  if (clStatus != CL_SUCCESS)
3839  {
3840    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
3841    goto cleanup;
3842  }
3843
3844  globalWorkSize[0] = image->columns;
3845  globalWorkSize[1] = image->rows;
3846  /* launch the kernel */
3847  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, globalWorkSize, NULL, 0, NULL, &event);
3848  if (clStatus != CL_SUCCESS)
3849  {
3850    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
3851    goto cleanup;
3852  }
3853  clEnv->library->clFlush(queue);
3854  RecordProfileData(clEnv,ComputeFunctionKernel,event);
3855  clEnv->library->clReleaseEvent(event);
3856
3857  if (ALIGNED(pixels,CLPixelPacket))
3858  {
3859    length = image->columns * image->rows;
3860    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3861  }
3862  else
3863  {
3864    length = image->columns * image->rows;
3865    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), pixels, 0, NULL, NULL);
3866  }
3867  if (clStatus != CL_SUCCESS)
3868  {
3869    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
3870    goto cleanup;
3871  }
3872  status=SyncCacheViewAuthenticPixels(image_view,exception);
3873
3874cleanup:
3875  OpenCLLogException(__FUNCTION__,__LINE__,exception);
3876
3877  image_view=DestroyCacheView(image_view);
3878
3879  if (clkernel != NULL) RelinquishOpenCLKernel(clEnv, clkernel);
3880  if (queue != NULL) RelinquishOpenCLCommandQueue(clEnv, queue);
3881  if (imageBuffer != NULL) clEnv->library->clReleaseMemObject(imageBuffer);
3882  if (parametersBuffer != NULL) clEnv->library->clReleaseMemObject(parametersBuffer);
3883
3884  return(status);
3885}
3886
3887MagickExport MagickBooleanType AccelerateFunctionImage(Image *image,
3888  const ChannelType channel,const MagickFunction function,
3889  const size_t number_parameters,const double *parameters,
3890  ExceptionInfo *exception)
3891{
3892  MagickBooleanType
3893    status;
3894
3895  assert(image != NULL);
3896  assert(exception != (ExceptionInfo *) NULL);
3897
3898  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
3899      (checkAccelerateCondition(image, channel) == MagickFalse))
3900    return(MagickFalse);
3901
3902  status=ComputeFunctionImage(image, channel, function, number_parameters, parameters, exception);
3903  return(status);
3904}
3905
3906/*
3907%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3908%                                                                             %
3909%                                                                             %
3910%                                                                             %
3911%     A c c e l e r a t e G r a y s c a l e I m a g e                         %
3912%                                                                             %
3913%                                                                             %
3914%                                                                             %
3915%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3916*/
3917
3918static MagickBooleanType ComputeGrayscaleImage(Image *image,
3919  const PixelIntensityMethod method,ExceptionInfo *exception)
3920{
3921  CacheView
3922    *image_view;
3923
3924  cl_command_queue
3925    queue;
3926
3927  cl_context
3928    context;
3929
3930  cl_int
3931    clStatus,
3932    intensityMethod;
3933
3934  cl_int
3935    colorspace;
3936
3937  cl_kernel
3938    grayscaleKernel;
3939
3940  cl_event
3941    event;
3942
3943  cl_mem
3944    imageBuffer;
3945
3946  cl_mem_flags
3947    mem_flags;
3948
3949  MagickBooleanType
3950    outputReady;
3951
3952  MagickCLEnv
3953    clEnv;
3954
3955  MagickSizeType
3956    length;
3957
3958  register ssize_t
3959    i;
3960
3961  void
3962    *inputPixels;
3963
3964  inputPixels = NULL;
3965  imageBuffer = NULL;
3966  grayscaleKernel = NULL;
3967
3968  assert(image != (Image *) NULL);
3969  assert(image->signature == MagickCoreSignature);
3970  if (image->debug != MagickFalse)
3971    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3972
3973  /*
3974   * initialize opencl env
3975   */
3976  clEnv = GetDefaultOpenCLEnv();
3977  context = GetOpenCLContext(clEnv);
3978  queue = AcquireOpenCLCommandQueue(clEnv);
3979
3980  outputReady = MagickFalse;
3981
3982  /* Create and initialize OpenCL buffers.
3983   inputPixels = AcquirePixelCachePixels(image, &length, exception);
3984   assume this  will get a writable image
3985   */
3986  image_view=AcquireAuthenticCacheView(image,exception);
3987  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3988  if (inputPixels == (void *) NULL)
3989  {
3990    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3991    goto cleanup;
3992  }
3993
3994  /* If the host pointer is aligned to the size of CLPixelPacket,
3995   then use the host buffer directly from the GPU; otherwise,
3996   create a buffer on the GPU and copy the data over
3997   */
3998  if (ALIGNED(inputPixels,CLPixelPacket))
3999  {
4000    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
4001  }
4002  else
4003  {
4004    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
4005  }
4006  /* create a CL buffer from image pixel buffer */
4007  length = image->columns * image->rows;
4008  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4009  if (clStatus != CL_SUCCESS)
4010  {
4011    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4012    goto cleanup;
4013  }
4014
4015  intensityMethod = method;
4016  colorspace = image->colorspace;
4017
4018  grayscaleKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Grayscale");
4019  if (grayscaleKernel == NULL)
4020  {
4021    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4022    goto cleanup;
4023  }
4024
4025  i = 0;
4026  clStatus=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4027  clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&intensityMethod);
4028  clStatus|=clEnv->library->clSetKernelArg(grayscaleKernel,i++,sizeof(cl_int),&colorspace);
4029  if (clStatus != CL_SUCCESS)
4030  {
4031    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4032    printf("no kernel\n");
4033    goto cleanup;
4034  }
4035
4036  {
4037    size_t global_work_size[2];
4038    global_work_size[0] = image->columns;
4039    global_work_size[1] = image->rows;
4040    /* launch the kernel */
4041	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, grayscaleKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
4042    if (clStatus != CL_SUCCESS)
4043    {
4044      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4045      goto cleanup;
4046    }
4047    clEnv->library->clFlush(queue);
4048    RecordProfileData(clEnv,GrayScaleKernel,event);
4049    clEnv->library->clReleaseEvent(event);
4050  }
4051
4052  if (ALIGNED(inputPixels,CLPixelPacket))
4053  {
4054    length = image->columns * image->rows;
4055    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4056  }
4057  else
4058  {
4059    length = image->columns * image->rows;
4060    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
4061  }
4062  if (clStatus != CL_SUCCESS)
4063  {
4064    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
4065    goto cleanup;
4066  }
4067
4068  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
4069
4070cleanup:
4071  OpenCLLogException(__FUNCTION__,__LINE__,exception);
4072
4073  image_view=DestroyCacheView(image_view);
4074
4075  if (imageBuffer!=NULL)
4076    clEnv->library->clReleaseMemObject(imageBuffer);
4077  if (grayscaleKernel!=NULL)
4078    RelinquishOpenCLKernel(clEnv, grayscaleKernel);
4079  if (queue != NULL)
4080    RelinquishOpenCLCommandQueue(clEnv, queue);
4081
4082  return( outputReady);
4083}
4084
4085MagickExport MagickBooleanType AccelerateGrayscaleImage(Image* image,
4086  const PixelIntensityMethod method,ExceptionInfo *exception)
4087{
4088  MagickBooleanType
4089    status;
4090
4091  assert(image != NULL);
4092  assert(exception != (ExceptionInfo *) NULL);
4093
4094  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
4095      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
4096    return(MagickFalse);
4097
4098  if (method == Rec601LuminancePixelIntensityMethod || method == Rec709LuminancePixelIntensityMethod)
4099    return(MagickFalse);
4100
4101  if (image->colorspace != sRGBColorspace)
4102    return(MagickFalse);
4103
4104  status=ComputeGrayscaleImage(image,method,exception);
4105  return(status);
4106}
4107
4108/*
4109%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4110%                                                                             %
4111%                                                                             %
4112%                                                                             %
4113%     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
4114%                                                                             %
4115%                                                                             %
4116%                                                                             %
4117%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4118*/
4119
4120static Image *ComputeLocalContrastImage(const Image *image,
4121  const double radius,const double strength,ExceptionInfo *exception)
4122{
4123  CacheView
4124    *filteredImage_view,
4125    *image_view;
4126
4127  cl_command_queue
4128    queue;
4129
4130  cl_context
4131    context;
4132
4133  cl_int
4134    clStatus,
4135    iRadius;
4136
4137  cl_kernel
4138    blurRowKernel,
4139    blurColumnKernel;
4140
4141  cl_event
4142    event;
4143
4144  cl_mem
4145    filteredImageBuffer,
4146    imageBuffer,
4147    imageKernelBuffer,
4148    tempImageBuffer;
4149
4150  cl_mem_flags
4151    mem_flags;
4152
4153  const void
4154    *inputPixels;
4155
4156  Image
4157    *filteredImage;
4158
4159  MagickBooleanType
4160    outputReady;
4161
4162  MagickCLEnv
4163    clEnv;
4164
4165  MagickSizeType
4166    length;
4167
4168  void
4169    *filteredPixels,
4170    *hostPtr;
4171
4172  unsigned int
4173    i,
4174    imageColumns,
4175    imageRows,
4176    passes;
4177
4178  clEnv = NULL;
4179  filteredImage = NULL;
4180  filteredImage_view = NULL;
4181  context = NULL;
4182  imageBuffer = NULL;
4183  filteredImageBuffer = NULL;
4184  tempImageBuffer = NULL;
4185  imageKernelBuffer = NULL;
4186  blurRowKernel = NULL;
4187  blurColumnKernel = NULL;
4188  queue = NULL;
4189  outputReady = MagickFalse;
4190
4191  clEnv = GetDefaultOpenCLEnv();
4192  context = GetOpenCLContext(clEnv);
4193  queue = AcquireOpenCLCommandQueue(clEnv);
4194
4195  /* Create and initialize OpenCL buffers. */
4196  {
4197    image_view=AcquireVirtualCacheView(image,exception);
4198    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
4199    if (inputPixels == (const void *) NULL)
4200    {
4201      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
4202      goto cleanup;
4203    }
4204
4205    /* If the host pointer is aligned to the size of CLPixelPacket,
4206     then use the host buffer directly from the GPU; otherwise,
4207     create a buffer on the GPU and copy the data over */
4208    if (ALIGNED(inputPixels,CLPixelPacket))
4209    {
4210      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4211    }
4212    else
4213    {
4214      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4215    }
4216    /* create a CL buffer from image pixel buffer */
4217    length = image->columns * image->rows;
4218    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4219    if (clStatus != CL_SUCCESS)
4220    {
4221      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4222      goto cleanup;
4223    }
4224  }
4225
4226  /* create output */
4227  {
4228    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
4229    assert(filteredImage != NULL);
4230    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
4231    {
4232      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
4233      goto cleanup;
4234    }
4235    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
4236    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
4237    if (filteredPixels == (void *) NULL)
4238    {
4239      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
4240      goto cleanup;
4241    }
4242
4243    if (ALIGNED(filteredPixels,CLPixelPacket))
4244    {
4245      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4246      hostPtr = filteredPixels;
4247    }
4248    else
4249    {
4250      mem_flags = CL_MEM_WRITE_ONLY;
4251      hostPtr = NULL;
4252    }
4253
4254    /* create a CL buffer from image pixel buffer */
4255    length = image->columns * image->rows;
4256    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4257    if (clStatus != CL_SUCCESS)
4258    {
4259      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4260      goto cleanup;
4261    }
4262  }
4263
4264  {
4265    /* create temp buffer */
4266    {
4267      length = image->columns * image->rows;
4268      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
4269      if (clStatus != CL_SUCCESS)
4270      {
4271        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4272        goto cleanup;
4273      }
4274    }
4275
4276    /* get the opencl kernel */
4277    {
4278      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "LocalContrastBlurRow");
4279      if (blurRowKernel == NULL)
4280      {
4281        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4282        goto cleanup;
4283      };
4284
4285      blurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "LocalContrastBlurApplyColumn");
4286      if (blurColumnKernel == NULL)
4287      {
4288        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4289        goto cleanup;
4290      };
4291    }
4292
4293    {
4294      imageColumns = (unsigned int) image->columns;
4295      imageRows = (unsigned int) image->rows;
4296      iRadius = (cl_int) fabs(radius);
4297
4298      passes = ((1.0f * imageColumns) * imageColumns * iRadius) / 4000000000.0f;
4299      passes = (passes < 1) ? 1: passes;
4300
4301      /* set the kernel arguments */
4302      i = 0;
4303      clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4304      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4305      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4306      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
4307      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
4308      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
4309
4310      if (clStatus != CL_SUCCESS)
4311      {
4312        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4313        goto cleanup;
4314      }
4315    }
4316
4317    /* launch the kernel */
4318    {
4319      int x;
4320      for (x = 0; x < passes; ++x) {
4321        size_t gsize[2];
4322        size_t wsize[2];
4323        size_t goffset[2];
4324
4325        gsize[0] = 256;
4326        gsize[1] = image->rows / passes;
4327        wsize[0] = 256;
4328        wsize[1] = 1;
4329        goffset[0] = 0;
4330        goffset[1] = x * gsize[1];
4331
4332        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
4333        if (clStatus != CL_SUCCESS)
4334        {
4335          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4336          goto cleanup;
4337        }
4338        RecordProfileData(clEnv,LocalContrastBlurRowKernel,event);
4339        clEnv->library->clReleaseEvent(event);
4340      }
4341    }
4342
4343    {
4344      cl_float FStrength = strength;
4345      i = 0;
4346      clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4347      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4348      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4349      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
4350      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
4351      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
4352      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
4353
4354      if (clStatus != CL_SUCCESS)
4355      {
4356        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4357        goto cleanup;
4358      }
4359    }
4360
4361    /* launch the kernel */
4362    {
4363      int x;
4364      for (x = 0; x < passes; ++x) {
4365        size_t gsize[2];
4366        size_t wsize[2];
4367        size_t goffset[2];
4368
4369        gsize[0] = ((image->columns + 3) / 4) * 4;
4370        gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
4371        wsize[0] = 4;
4372        wsize[1] = 64;
4373        goffset[0] = 0;
4374        goffset[1] = x * gsize[1];
4375
4376        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
4377        if (clStatus != CL_SUCCESS)
4378        {
4379          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4380          goto cleanup;
4381        }
4382        RecordProfileData(clEnv,LocalContrastBlurApplyColumnKernel,event);
4383        clEnv->library->clReleaseEvent(event);
4384      }
4385    }
4386  }
4387
4388  /* get result */
4389  if (ALIGNED(filteredPixels,CLPixelPacket))
4390  {
4391    length = image->columns * image->rows;
4392    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4393  }
4394  else
4395  {
4396    length = image->columns * image->rows;
4397    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4398  }
4399  if (clStatus != CL_SUCCESS)
4400  {
4401    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
4402    goto cleanup;
4403  }
4404
4405  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
4406
4407cleanup:
4408  OpenCLLogException(__FUNCTION__,__LINE__,exception);
4409
4410  image_view=DestroyCacheView(image_view);
4411  if (filteredImage_view != NULL)
4412    filteredImage_view=DestroyCacheView(filteredImage_view);
4413
4414  if (imageBuffer!=NULL)                      clEnv->library->clReleaseMemObject(imageBuffer);
4415  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
4416  if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
4417  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
4418  if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
4419  if (blurColumnKernel!=NULL)                 RelinquishOpenCLKernel(clEnv, blurColumnKernel);
4420  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
4421  if (outputReady == MagickFalse)
4422  {
4423    if (filteredImage != NULL)
4424    {
4425      DestroyImage(filteredImage);
4426      filteredImage = NULL;
4427    }
4428  }
4429  return(filteredImage);
4430}
4431
4432MagickExport Image *AccelerateLocalContrastImage(const Image *image,
4433  const double radius,const double strength,ExceptionInfo *exception)
4434{
4435  Image
4436    *filteredImage;
4437
4438  assert(image != NULL);
4439  assert(exception != (ExceptionInfo *) NULL);
4440
4441  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
4442    (checkAccelerateCondition(image, AllChannels) == MagickFalse))
4443    return NULL;
4444
4445  filteredImage=ComputeLocalContrastImage(image,radius,strength,exception);
4446
4447  return(filteredImage);
4448}
4449
4450/*
4451%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4452%                                                                             %
4453%                                                                             %
4454%                                                                             %
4455%     A c c e l e r a t e M o d u l a t e I m a g e                           %
4456%                                                                             %
4457%                                                                             %
4458%                                                                             %
4459%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4460*/
4461
4462static MagickBooleanType ComputeModulateImage(Image *image,
4463  double percent_brightness,double percent_hue,double percent_saturation,
4464  ColorspaceType colorspace,ExceptionInfo *exception)
4465{
4466  CacheView
4467    *image_view;
4468
4469  cl_float
4470    bright,
4471    hue,
4472    saturation;
4473
4474  cl_context
4475    context;
4476
4477  cl_command_queue
4478    queue;
4479
4480  cl_int
4481    color,
4482    clStatus;
4483
4484  cl_kernel
4485    modulateKernel;
4486
4487  cl_event
4488    event;
4489
4490  cl_mem
4491    imageBuffer;
4492
4493  cl_mem_flags
4494    mem_flags;
4495
4496  MagickBooleanType
4497    outputReady;
4498
4499  MagickCLEnv
4500    clEnv;
4501
4502  MagickSizeType
4503    length;
4504
4505  register ssize_t
4506    i;
4507
4508  void
4509    *inputPixels;
4510
4511  inputPixels = NULL;
4512  imageBuffer = NULL;
4513  modulateKernel = NULL;
4514
4515  assert(image != (Image *) NULL);
4516  assert(image->signature == MagickCoreSignature);
4517  if (image->debug != MagickFalse)
4518    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
4519
4520  /*
4521   * initialize opencl env
4522   */
4523  clEnv = GetDefaultOpenCLEnv();
4524  context = GetOpenCLContext(clEnv);
4525  queue = AcquireOpenCLCommandQueue(clEnv);
4526
4527  outputReady = MagickFalse;
4528
4529  /* Create and initialize OpenCL buffers.
4530   inputPixels = AcquirePixelCachePixels(image, &length, exception);
4531   assume this  will get a writable image
4532   */
4533  image_view=AcquireAuthenticCacheView(image,exception);
4534  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
4535  if (inputPixels == (void *) NULL)
4536  {
4537    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
4538    goto cleanup;
4539  }
4540
4541  /* If the host pointer is aligned to the size of CLPixelPacket,
4542   then use the host buffer directly from the GPU; otherwise,
4543   create a buffer on the GPU and copy the data over
4544   */
4545  if (ALIGNED(inputPixels,CLPixelPacket))
4546  {
4547    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
4548  }
4549  else
4550  {
4551    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
4552  }
4553  /* create a CL buffer from image pixel buffer */
4554  length = image->columns * image->rows;
4555  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4556  if (clStatus != CL_SUCCESS)
4557  {
4558    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
4559    goto cleanup;
4560  }
4561
4562  modulateKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "Modulate");
4563  if (modulateKernel == NULL)
4564  {
4565    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
4566    goto cleanup;
4567  }
4568
4569  bright=percent_brightness;
4570  hue=percent_hue;
4571  saturation=percent_saturation;
4572  color=colorspace;
4573
4574  i = 0;
4575  clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4576  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
4577  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
4578  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
4579  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
4580  if (clStatus != CL_SUCCESS)
4581  {
4582    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4583    printf("no kernel\n");
4584    goto cleanup;
4585  }
4586
4587  {
4588    size_t global_work_size[2];
4589    global_work_size[0] = image->columns;
4590    global_work_size[1] = image->rows;
4591    /* launch the kernel */
4592	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
4593    if (clStatus != CL_SUCCESS)
4594    {
4595      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4596      goto cleanup;
4597    }
4598    clEnv->library->clFlush(queue);
4599    RecordProfileData(clEnv,ModulateKernel,event);
4600    clEnv->library->clReleaseEvent(event);
4601  }
4602
4603  if (ALIGNED(inputPixels,CLPixelPacket))
4604  {
4605    length = image->columns * image->rows;
4606    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
4607  }
4608  else
4609  {
4610    length = image->columns * image->rows;
4611    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
4612  }
4613  if (clStatus != CL_SUCCESS)
4614  {
4615    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
4616    goto cleanup;
4617  }
4618
4619  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
4620
4621cleanup:
4622  OpenCLLogException(__FUNCTION__,__LINE__,exception);
4623
4624  image_view=DestroyCacheView(image_view);
4625
4626  if (imageBuffer!=NULL)
4627    clEnv->library->clReleaseMemObject(imageBuffer);
4628  if (modulateKernel!=NULL)
4629    RelinquishOpenCLKernel(clEnv, modulateKernel);
4630  if (queue != NULL)
4631    RelinquishOpenCLCommandQueue(clEnv, queue);
4632
4633  return outputReady;
4634
4635}
4636
4637MagickExport MagickBooleanType AccelerateModulateImage(Image *image,
4638  double percent_brightness,double percent_hue,double percent_saturation,
4639  ColorspaceType colorspace,ExceptionInfo *exception)
4640{
4641  MagickBooleanType
4642    status;
4643
4644  assert(image != NULL);
4645  assert(exception != (ExceptionInfo *) NULL);
4646
4647  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
4648      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
4649    return(MagickFalse);
4650
4651  if ((colorspace != HSLColorspace && colorspace != UndefinedColorspace))
4652    return(MagickFalse);
4653
4654  status = ComputeModulateImage(image,percent_brightness, percent_hue, percent_saturation, colorspace, exception);
4655  return(status);
4656}
4657
4658/*
4659%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4660%                                                                             %
4661%                                                                             %
4662%                                                                             %
4663%     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
4664%                                                                             %
4665%                                                                             %
4666%                                                                             %
4667%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4668*/
4669
4670static Image* ComputeMotionBlurImage(const Image *image,
4671  const ChannelType channel,const double *kernel,const size_t width,
4672  const OffsetInfo *offset,ExceptionInfo *exception)
4673{
4674  CacheView
4675    *filteredImage_view,
4676    *image_view;
4677
4678  cl_command_queue
4679    queue;
4680
4681  cl_context
4682    context;
4683
4684  cl_float4
4685    biasPixel;
4686
4687  cl_int
4688    clStatus;
4689
4690  cl_kernel
4691    motionBlurKernel;
4692
4693  cl_event
4694    event;
4695
4696  cl_mem
4697    filteredImageBuffer,
4698    imageBuffer,
4699    imageKernelBuffer,
4700    offsetBuffer;
4701
4702  cl_mem_flags
4703    mem_flags;
4704
4705  const void
4706    *inputPixels;
4707
4708  float
4709    *kernelBufferPtr;
4710
4711  Image
4712    *filteredImage;
4713
4714  int
4715    *offsetBufferPtr;
4716
4717  MagickBooleanType
4718    outputReady;
4719
4720  MagickCLEnv
4721   clEnv;
4722
4723  PixelInfo
4724    bias;
4725
4726  MagickSizeType
4727    length;
4728
4729  size_t
4730    global_work_size[2],
4731    local_work_size[2];
4732
4733  unsigned int
4734    i,
4735    imageHeight,
4736    imageWidth,
4737    matte;
4738
4739  void
4740    *filteredPixels,
4741    *hostPtr;
4742
4743  outputReady = MagickFalse;
4744  context = NULL;
4745  filteredImage = NULL;
4746  filteredImage_view = NULL;
4747  imageBuffer = NULL;
4748  filteredImageBuffer = NULL;
4749  imageKernelBuffer = NULL;
4750  motionBlurKernel = NULL;
4751  queue = NULL;
4752
4753  clEnv = GetDefaultOpenCLEnv();
4754  context = GetOpenCLContext(clEnv);
4755
4756  /* Create and initialize OpenCL buffers. */
4757
4758  image_view=AcquireVirtualCacheView(image,exception);
4759  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
4760  if (inputPixels == (const void *) NULL)
4761  {
4762    (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
4763      "UnableToReadPixelCache.","`%s'",image->filename);
4764    goto cleanup;
4765  }
4766
4767  // If the host pointer is aligned to the size of CLPixelPacket,
4768  // then use the host buffer directly from the GPU; otherwise,
4769  // create a buffer on the GPU and copy the data over
4770  if (ALIGNED(inputPixels,CLPixelPacket))
4771  {
4772    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
4773  }
4774  else
4775  {
4776    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
4777  }
4778  // create a CL buffer from image pixel buffer
4779  length = image->columns * image->rows;
4780  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags,
4781    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
4782  if (clStatus != CL_SUCCESS)
4783  {
4784    (void) ThrowMagickException(exception, GetMagickModule(),
4785      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
4786    goto cleanup;
4787  }
4788
4789
4790  filteredImage = CloneImage(image,image->columns,image->rows,
4791    MagickTrue,exception);
4792  assert(filteredImage != NULL);
4793  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
4794  {
4795    (void) ThrowMagickException(exception, GetMagickModule(),
4796      ResourceLimitError, "CloneImage failed.", "'%s'", ".");
4797    goto cleanup;
4798  }
4799  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
4800  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
4801  if (filteredPixels == (void *) NULL)
4802  {
4803    (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
4804      "UnableToReadPixelCache.","`%s'",filteredImage->filename);
4805    goto cleanup;
4806  }
4807
4808  if (ALIGNED(filteredPixels,CLPixelPacket))
4809  {
4810    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
4811    hostPtr = filteredPixels;
4812  }
4813  else
4814  {
4815    mem_flags = CL_MEM_WRITE_ONLY;
4816    hostPtr = NULL;
4817  }
4818  // create a CL buffer from image pixel buffer
4819  length = image->columns * image->rows;
4820  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags,
4821    length * sizeof(CLPixelPacket), hostPtr, &clStatus);
4822  if (clStatus != CL_SUCCESS)
4823  {
4824    (void) ThrowMagickException(exception, GetMagickModule(),
4825      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
4826    goto cleanup;
4827  }
4828
4829
4830  imageKernelBuffer = clEnv->library->clCreateBuffer(context,
4831    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
4832    &clStatus);
4833  if (clStatus != CL_SUCCESS)
4834  {
4835    (void) ThrowMagickException(exception, GetMagickModule(),
4836      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
4837    goto cleanup;
4838  }
4839
4840  queue = AcquireOpenCLCommandQueue(clEnv);
4841  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
4842    CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
4843  if (clStatus != CL_SUCCESS)
4844  {
4845    (void) ThrowMagickException(exception, GetMagickModule(),
4846      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
4847    goto cleanup;
4848  }
4849  for (i = 0; i < width; i++)
4850  {
4851    kernelBufferPtr[i] = (float) kernel[i];
4852  }
4853  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
4854    0, NULL, NULL);
4855 if (clStatus != CL_SUCCESS)
4856  {
4857    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
4858      "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
4859    goto cleanup;
4860  }
4861
4862  offsetBuffer = clEnv->library->clCreateBuffer(context,
4863    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
4864    &clStatus);
4865  if (clStatus != CL_SUCCESS)
4866  {
4867    (void) ThrowMagickException(exception, GetMagickModule(),
4868      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
4869    goto cleanup;
4870  }
4871
4872  offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
4873    CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
4874  if (clStatus != CL_SUCCESS)
4875  {
4876    (void) ThrowMagickException(exception, GetMagickModule(),
4877      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
4878    goto cleanup;
4879  }
4880  for (i = 0; i < width; i++)
4881  {
4882    offsetBufferPtr[2*i] = (int)offset[i].x;
4883    offsetBufferPtr[2*i+1] = (int)offset[i].y;
4884  }
4885  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
4886    NULL, NULL);
4887 if (clStatus != CL_SUCCESS)
4888  {
4889    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
4890      "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
4891    goto cleanup;
4892  }
4893
4894
4895 // get the OpenCL kernel
4896  motionBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE,
4897    "MotionBlur");
4898  if (motionBlurKernel == NULL)
4899  {
4900    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
4901      "AcquireOpenCLKernel failed.", "'%s'", ".");
4902    goto cleanup;
4903  }
4904
4905  // set the kernel arguments
4906  i = 0;
4907  clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
4908    (void *)&imageBuffer);
4909  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
4910    (void *)&filteredImageBuffer);
4911  imageWidth = (unsigned int) image->columns;
4912  imageHeight = (unsigned int) image->rows;
4913  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
4914    &imageWidth);
4915  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
4916    &imageHeight);
4917  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
4918    (void *)&imageKernelBuffer);
4919  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
4920    &width);
4921  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
4922    (void *)&offsetBuffer);
4923
4924  GetPixelInfo(image,&bias);
4925  biasPixel.s[0] = bias.red;
4926  biasPixel.s[1] = bias.green;
4927  biasPixel.s[2] = bias.blue;
4928  biasPixel.s[3] = bias.alpha;
4929  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
4930
4931  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &channel);
4932  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
4933  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
4934  if (clStatus != CL_SUCCESS)
4935  {
4936    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
4937      "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
4938    goto cleanup;
4939  }
4940
4941  // launch the kernel
4942  local_work_size[0] = 16;
4943  local_work_size[1] = 16;
4944  global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
4945                                (unsigned int) image->columns,(unsigned int) local_work_size[0]);
4946  global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
4947                                (unsigned int) image->rows,(unsigned int) local_work_size[1]);
4948  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
4949	  global_work_size, local_work_size, 0, NULL, &event);
4950
4951  if (clStatus != CL_SUCCESS)
4952  {
4953    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
4954      "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
4955    goto cleanup;
4956  }
4957  clEnv->library->clFlush(queue);
4958  RecordProfileData(clEnv,MotionBlurKernel,event);
4959  clEnv->library->clReleaseEvent(event);
4960
4961  if (ALIGNED(filteredPixels,CLPixelPacket))
4962  {
4963    length = image->columns * image->rows;
4964    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
4965      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
4966      NULL, &clStatus);
4967  }
4968  else
4969  {
4970    length = image->columns * image->rows;
4971    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
4972      length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
4973  }
4974  if (clStatus != CL_SUCCESS)
4975  {
4976    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
4977      "Reading output image from CL buffer failed.", "'%s'", ".");
4978    goto cleanup;
4979  }
4980  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
4981
4982cleanup:
4983
4984  image_view=DestroyCacheView(image_view);
4985  if (filteredImage_view != NULL)
4986    filteredImage_view=DestroyCacheView(filteredImage_view);
4987
4988  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
4989  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
4990  if (imageKernelBuffer!=NULL)    clEnv->library->clReleaseMemObject(imageKernelBuffer);
4991  if (motionBlurKernel!=NULL)  RelinquishOpenCLKernel(clEnv, motionBlurKernel);
4992  if (queue != NULL)           RelinquishOpenCLCommandQueue(clEnv, queue);
4993  if (outputReady == MagickFalse && filteredImage != NULL)
4994    filteredImage=DestroyImage(filteredImage);
4995
4996  return(filteredImage);
4997}
4998
4999MagickExport Image *AccelerateMotionBlurImage(const Image *image,
5000  const ChannelType channel,const double* kernel,const size_t width,
5001  const OffsetInfo *offset,ExceptionInfo *exception)
5002{
5003  Image
5004    *filteredImage;
5005
5006  assert(image != NULL);
5007  assert(kernel != (double *) NULL);
5008  assert(offset != (OffsetInfo *) NULL);
5009  assert(exception != (ExceptionInfo *) NULL);
5010
5011  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
5012      (checkAccelerateCondition(image, channel) == MagickFalse))
5013    return NULL;
5014
5015  filteredImage=ComputeMotionBlurImage(image, channel, kernel, width,
5016    offset, exception);
5017  return(filteredImage);
5018}
5019
5020/*
5021%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5022%                                                                             %
5023%                                                                             %
5024%                                                                             %
5025%     A c c e l e r a t e R a n d o m I m a g e                               %
5026%                                                                             %
5027%                                                                             %
5028%                                                                             %
5029%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5030*/
5031
5032static MagickBooleanType LaunchRandomImageKernel(MagickCLEnv clEnv,
5033  cl_command_queue queue,cl_mem imageBuffer,const unsigned int imageColumns,
5034  const unsigned int imageRows,cl_mem seedBuffer,
5035  const unsigned int numGenerators,ExceptionInfo *exception)
5036{
5037  int
5038    k;
5039
5040  cl_int
5041    clStatus;
5042
5043  cl_kernel
5044    randomImageKernel;
5045
5046  cl_event
5047    event;
5048
5049  MagickBooleanType
5050    status;
5051
5052  size_t
5053    global_work_size,
5054    local_work_size;
5055
5056  status = MagickFalse;
5057  randomImageKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RandomNumberGenerator");
5058
5059  k = 0;
5060  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_mem),(void*)&imageBuffer);
5061  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_uint),(void*)&imageColumns);
5062  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_uint),(void*)&imageRows);
5063  clEnv->library->clSetKernelArg(randomImageKernel,k++,sizeof(cl_mem),(void*)&seedBuffer);
5064  {
5065    const float randNormNumerator = 1.0f;
5066    const unsigned int randNormDenominator = (unsigned int)(~0UL);
5067    clEnv->library->clSetKernelArg(randomImageKernel,k++,
5068          sizeof(float),(void*)&randNormNumerator);
5069    clEnv->library->clSetKernelArg(randomImageKernel,k++,
5070          sizeof(cl_uint),(void*)&randNormDenominator);
5071  }
5072
5073
5074  global_work_size = numGenerators;
5075  local_work_size = 64;
5076
5077  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue,randomImageKernel,1,NULL,&global_work_size,
5078	  &local_work_size, 0, NULL, &event);
5079
5080  if (clStatus != CL_SUCCESS)
5081  {
5082    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning,
5083                                      "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
5084    goto cleanup;
5085  }
5086  RecordProfileData(clEnv,RandomNumberGeneratorKernel,event);
5087  clEnv->library->clReleaseEvent(event);
5088
5089  status = MagickTrue;
5090
5091cleanup:
5092  if (randomImageKernel!=NULL) RelinquishOpenCLKernel(clEnv, randomImageKernel);
5093  return(status);
5094}
5095
5096static MagickBooleanType ComputeRandomImage(Image* image,
5097  ExceptionInfo* exception)
5098{
5099  CacheView
5100    *image_view;
5101
5102  cl_command_queue
5103    queue;
5104
5105  cl_context
5106    context;
5107
5108  cl_int
5109    clStatus;
5110
5111  /* Don't release this buffer in this function !!! */
5112  cl_mem
5113    randomNumberSeedsBuffer;
5114
5115  cl_mem_flags
5116    mem_flags;
5117
5118  cl_mem
5119   imageBuffer;
5120
5121  MagickBooleanType
5122    outputReady,
5123    status;
5124
5125  MagickCLEnv
5126    clEnv;
5127
5128  MagickSizeType
5129    length;
5130
5131  void
5132    *inputPixels;
5133
5134  status = MagickFalse;
5135  outputReady = MagickFalse;
5136  inputPixels = NULL;
5137  context = NULL;
5138  imageBuffer = NULL;
5139  queue = NULL;
5140
5141  clEnv = GetDefaultOpenCLEnv();
5142  context = GetOpenCLContext(clEnv);
5143
5144  /* Create and initialize OpenCL buffers. */
5145  image_view=AcquireAuthenticCacheView(image,exception);
5146  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
5147  if (inputPixels == (void *) NULL)
5148  {
5149    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
5150    goto cleanup;
5151  }
5152
5153  /* If the host pointer is aligned to the size of CLPixelPacket,
5154     then use the host buffer directly from the GPU; otherwise,
5155     create a buffer on the GPU and copy the data over */
5156  if (ALIGNED(inputPixels,CLPixelPacket))
5157  {
5158    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
5159  }
5160  else
5161  {
5162    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
5163  }
5164  /* create a CL buffer from image pixel buffer */
5165  length = image->columns * image->rows;
5166  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
5167  if (clStatus != CL_SUCCESS)
5168  {
5169    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5170    goto cleanup;
5171  }
5172
5173  queue = AcquireOpenCLCommandQueue(clEnv);
5174
5175  randomNumberSeedsBuffer = GetAndLockRandSeedBuffer(clEnv);
5176  if (randomNumberSeedsBuffer==NULL)
5177  {
5178    (void) OpenCLThrowMagickException(exception, GetMagickModule(),
5179           ResourceLimitWarning, "Failed to get GPU random number generators.",
5180           "'%s'", ".");
5181    goto cleanup;
5182  }
5183
5184  status = LaunchRandomImageKernel(clEnv,queue,
5185                                   imageBuffer,
5186                                   (unsigned int) image->columns,
5187                                   (unsigned int) image->rows,
5188                                   randomNumberSeedsBuffer,
5189                                   GetNumRandGenerators(clEnv),
5190                                   exception);
5191  if (status==MagickFalse)
5192  {
5193    goto cleanup;
5194  }
5195
5196  if (ALIGNED(inputPixels,CLPixelPacket))
5197  {
5198    length = image->columns * image->rows;
5199    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
5200  }
5201  else
5202  {
5203    length = image->columns * image->rows;
5204    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
5205  }
5206  if (clStatus != CL_SUCCESS)
5207  {
5208    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
5209    goto cleanup;
5210  }
5211  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
5212
5213cleanup:
5214  OpenCLLogException(__FUNCTION__,__LINE__,exception);
5215
5216  image_view=DestroyCacheView(image_view);
5217
5218  UnlockRandSeedBuffer(clEnv);
5219  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
5220  if (queue != NULL)                  RelinquishOpenCLCommandQueue(clEnv, queue);
5221  return outputReady;
5222}
5223
5224MagickExport MagickBooleanType AccelerateRandomImage(Image *image,
5225  ExceptionInfo* exception)
5226{
5227  MagickBooleanType
5228    status;
5229
5230  assert(image != NULL);
5231  assert(exception != (ExceptionInfo *) NULL);
5232
5233  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
5234      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
5235    return(MagickFalse);
5236
5237  status=ComputeRandomImage(image,exception);
5238  return(status);
5239}
5240
5241/*
5242%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5243%                                                                             %
5244%                                                                             %
5245%                                                                             %
5246%     A c c e l e r a t e R e s i z e I m a g e                               %
5247%                                                                             %
5248%                                                                             %
5249%                                                                             %
5250%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5251*/
5252
5253static MagickBooleanType resizeHorizontalFilter(cl_mem image,
5254  const unsigned int imageColumns,const unsigned int imageRows,
5255  const unsigned int matte,cl_mem resizedImage,
5256  const unsigned int resizedColumns,const unsigned int resizedRows,
5257  const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
5258  const float xFactor,MagickCLEnv clEnv,cl_command_queue queue,
5259  ExceptionInfo *exception)
5260{
5261  cl_kernel
5262    horizontalKernel;
5263
5264  cl_event
5265    event;
5266
5267  cl_int clStatus;
5268
5269  const unsigned int
5270    workgroupSize = 256;
5271
5272  float
5273    resizeFilterScale,
5274    resizeFilterSupport,
5275    resizeFilterWindowSupport,
5276    resizeFilterBlur,
5277    scale,
5278    support;
5279
5280  int
5281    cacheRangeStart,
5282    cacheRangeEnd,
5283    numCachedPixels,
5284    resizeFilterType,
5285    resizeWindowType;
5286
5287  MagickBooleanType
5288    status = MagickFalse;
5289
5290  size_t
5291    deviceLocalMemorySize,
5292    gammaAccumulatorLocalMemorySize,
5293    global_work_size[2],
5294    imageCacheLocalMemorySize,
5295    pixelAccumulatorLocalMemorySize,
5296    local_work_size[2],
5297    totalLocalMemorySize,
5298    weightAccumulatorLocalMemorySize;
5299
5300  unsigned int
5301    chunkSize,
5302    i,
5303    pixelPerWorkgroup;
5304
5305  horizontalKernel = NULL;
5306  status = MagickFalse;
5307
5308  /*
5309  Apply filter to resize vertically from image to resize image.
5310  */
5311  scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
5312  support=scale*GetResizeFilterSupport(resizeFilter);
5313  if (support < 0.5)
5314  {
5315    /*
5316    Support too small even for nearest neighbour: Reduce to point
5317    sampling.
5318    */
5319    support=(MagickRealType) 0.5;
5320    scale=1.0;
5321  }
5322  scale=PerceptibleReciprocal(scale);
5323
5324  if (resizedColumns < workgroupSize)
5325  {
5326    chunkSize = 32;
5327    pixelPerWorkgroup = 32;
5328  }
5329  else
5330  {
5331    chunkSize = workgroupSize;
5332    pixelPerWorkgroup = workgroupSize;
5333  }
5334
5335  /* get the local memory size supported by the device */
5336  deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
5337
5338DisableMSCWarning(4127)
5339  while(1)
5340RestoreMSCWarning
5341  {
5342    /* calculate the local memory size needed per workgroup */
5343    cacheRangeStart = (int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
5344    cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+MagickEpsilon)+support+0.5);
5345    numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
5346    imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
5347    totalLocalMemorySize = imageCacheLocalMemorySize;
5348
5349    /* local size for the pixel accumulator */
5350    pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
5351    totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
5352
5353    /* local memory size for the weight accumulator */
5354    weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
5355    totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
5356
5357    /* local memory size for the gamma accumulator */
5358    if (matte == 0)
5359      gammaAccumulatorLocalMemorySize = sizeof(float);
5360    else
5361      gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
5362    totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
5363
5364    if (totalLocalMemorySize <= deviceLocalMemorySize)
5365      break;
5366    else
5367    {
5368      pixelPerWorkgroup = pixelPerWorkgroup/2;
5369      chunkSize = chunkSize/2;
5370      if (pixelPerWorkgroup == 0
5371          || chunkSize == 0)
5372      {
5373        /* quit, fallback to CPU */
5374        goto cleanup;
5375      }
5376    }
5377  }
5378
5379  resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
5380  resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
5381
5382
5383  if (resizeFilterType == SincFastWeightingFunction
5384    && resizeWindowType == SincFastWeightingFunction)
5385  {
5386    horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilterSinc");
5387  }
5388  else
5389  {
5390    horizontalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeHorizontalFilter");
5391  }
5392  if (horizontalKernel == NULL)
5393  {
5394    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
5395    goto cleanup;
5396  }
5397
5398  i = 0;
5399  clStatus = clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&image);
5400  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
5401  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
5402  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&matte);
5403  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&xFactor);
5404  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
5405
5406  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
5407  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
5408
5409  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeFilterType);
5410  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), (void*)&resizeWindowType);
5411  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
5412
5413  resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
5414  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
5415
5416  resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
5417  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
5418
5419  resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
5420  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
5421
5422  resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
5423  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
5424
5425
5426  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, imageCacheLocalMemorySize, NULL);
5427  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(int), &numCachedPixels);
5428  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
5429  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, sizeof(unsigned int), &chunkSize);
5430
5431
5432  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
5433  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
5434  clStatus |= clEnv->library->clSetKernelArg(horizontalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
5435
5436  if (clStatus != CL_SUCCESS)
5437  {
5438    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
5439    goto cleanup;
5440  }
5441
5442  global_work_size[0] = (resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
5443  global_work_size[1] = resizedRows;
5444
5445  local_work_size[0] = workgroupSize;
5446  local_work_size[1] = 1;
5447  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, horizontalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event);
5448  (void) local_work_size;
5449  if (clStatus != CL_SUCCESS)
5450  {
5451    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
5452    goto cleanup;
5453  }
5454  clEnv->library->clFlush(queue);
5455  RecordProfileData(clEnv,ResizeHorizontalKernel,event);
5456  clEnv->library->clReleaseEvent(event);
5457  status = MagickTrue;
5458
5459
5460cleanup:
5461  OpenCLLogException(__FUNCTION__,__LINE__,exception);
5462
5463  if (horizontalKernel != NULL) RelinquishOpenCLKernel(clEnv, horizontalKernel);
5464
5465  return(status);
5466}
5467
5468static MagickBooleanType resizeVerticalFilter(cl_mem image,
5469  const unsigned int imageColumns,const unsigned int imageRows,
5470  const unsigned int matte,cl_mem resizedImage,
5471  const unsigned int resizedColumns,const unsigned int resizedRows,
5472  const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
5473  const float yFactor,MagickCLEnv clEnv,cl_command_queue queue,
5474  ExceptionInfo *exception)
5475{
5476  cl_kernel
5477    verticalKernel;
5478
5479  cl_event
5480    event;
5481
5482  cl_int clStatus;
5483
5484  const unsigned int
5485    workgroupSize = 256;
5486
5487  float
5488    resizeFilterScale,
5489    resizeFilterSupport,
5490    resizeFilterWindowSupport,
5491    resizeFilterBlur,
5492    scale,
5493    support;
5494
5495  int
5496    cacheRangeStart,
5497    cacheRangeEnd,
5498    numCachedPixels,
5499    resizeFilterType,
5500    resizeWindowType;
5501
5502  MagickBooleanType
5503    status = MagickFalse;
5504
5505  size_t
5506    deviceLocalMemorySize,
5507    gammaAccumulatorLocalMemorySize,
5508    global_work_size[2],
5509    imageCacheLocalMemorySize,
5510    pixelAccumulatorLocalMemorySize,
5511    local_work_size[2],
5512    totalLocalMemorySize,
5513    weightAccumulatorLocalMemorySize;
5514
5515  unsigned int
5516    chunkSize,
5517    i,
5518    pixelPerWorkgroup;
5519
5520  verticalKernel = NULL;
5521  status = MagickFalse;
5522
5523  /*
5524  Apply filter to resize vertically from image to resize image.
5525  */
5526  scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
5527  support=scale*GetResizeFilterSupport(resizeFilter);
5528  if (support < 0.5)
5529  {
5530    /*
5531    Support too small even for nearest neighbour: Reduce to point
5532    sampling.
5533    */
5534    support=(MagickRealType) 0.5;
5535    scale=1.0;
5536  }
5537  scale=PerceptibleReciprocal(scale);
5538
5539  if (resizedRows < workgroupSize)
5540  {
5541    chunkSize = 32;
5542    pixelPerWorkgroup = 32;
5543  }
5544  else
5545  {
5546    chunkSize = workgroupSize;
5547    pixelPerWorkgroup = workgroupSize;
5548  }
5549
5550  /* get the local memory size supported by the device */
5551  deviceLocalMemorySize = GetOpenCLDeviceLocalMemorySize(clEnv);
5552
5553DisableMSCWarning(4127)
5554  while(1)
5555RestoreMSCWarning
5556  {
5557    /* calculate the local memory size needed per workgroup */
5558    cacheRangeStart = (int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
5559    cacheRangeEnd = (int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+MagickEpsilon)+support+0.5);
5560    numCachedPixels = cacheRangeEnd - cacheRangeStart + 1;
5561    imageCacheLocalMemorySize = numCachedPixels * sizeof(CLPixelPacket);
5562    totalLocalMemorySize = imageCacheLocalMemorySize;
5563
5564    /* local size for the pixel accumulator */
5565    pixelAccumulatorLocalMemorySize = chunkSize * sizeof(cl_float4);
5566    totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
5567
5568    /* local memory size for the weight accumulator */
5569    weightAccumulatorLocalMemorySize = chunkSize * sizeof(float);
5570    totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
5571
5572    /* local memory size for the gamma accumulator */
5573    if (matte == 0)
5574      gammaAccumulatorLocalMemorySize = sizeof(float);
5575    else
5576      gammaAccumulatorLocalMemorySize = chunkSize * sizeof(float);
5577    totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
5578
5579    if (totalLocalMemorySize <= deviceLocalMemorySize)
5580      break;
5581    else
5582    {
5583      pixelPerWorkgroup = pixelPerWorkgroup/2;
5584      chunkSize = chunkSize/2;
5585      if (pixelPerWorkgroup == 0
5586          || chunkSize == 0)
5587      {
5588        /* quit, fallback to CPU */
5589        goto cleanup;
5590      }
5591    }
5592  }
5593
5594  resizeFilterType = (int)GetResizeFilterWeightingType(resizeFilter);
5595  resizeWindowType = (int)GetResizeFilterWindowWeightingType(resizeFilter);
5596
5597  if (resizeFilterType == SincFastWeightingFunction
5598    && resizeWindowType == SincFastWeightingFunction)
5599    verticalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilterSinc");
5600  else
5601    verticalKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "ResizeVerticalFilter");
5602
5603  if (verticalKernel == NULL)
5604  {
5605    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
5606    goto cleanup;
5607  }
5608
5609  i = 0;
5610  clStatus = clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_mem), (void*)&image);
5611  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&imageColumns);
5612  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&imageRows);
5613  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&matte);
5614  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&yFactor);
5615  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_mem), (void*)&resizedImage);
5616
5617  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&resizedColumns);
5618  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), (void*)&resizedRows);
5619
5620  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(int), (void*)&resizeFilterType);
5621  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(int), (void*)&resizeWindowType);
5622  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(cl_mem), (void*)&resizeFilterCubicCoefficients);
5623
5624  resizeFilterScale = (float) GetResizeFilterScale(resizeFilter);
5625  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&resizeFilterScale);
5626
5627  resizeFilterSupport = (float) GetResizeFilterSupport(resizeFilter);
5628  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&resizeFilterSupport);
5629
5630  resizeFilterWindowSupport = (float) GetResizeFilterWindowSupport(resizeFilter);
5631  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&resizeFilterWindowSupport);
5632
5633  resizeFilterBlur = (float) GetResizeFilterBlur(resizeFilter);
5634  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(float), (void*)&resizeFilterBlur);
5635
5636
5637  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, imageCacheLocalMemorySize, NULL);
5638  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(int), &numCachedPixels);
5639  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), &pixelPerWorkgroup);
5640  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, sizeof(unsigned int), &chunkSize);
5641
5642
5643  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, pixelAccumulatorLocalMemorySize, NULL);
5644  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, weightAccumulatorLocalMemorySize, NULL);
5645  clStatus |= clEnv->library->clSetKernelArg(verticalKernel, i++, gammaAccumulatorLocalMemorySize, NULL);
5646
5647  if (clStatus != CL_SUCCESS)
5648  {
5649    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
5650    goto cleanup;
5651  }
5652
5653  global_work_size[0] = resizedColumns;
5654  global_work_size[1] = (resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*workgroupSize;
5655
5656  local_work_size[0] = 1;
5657  local_work_size[1] = workgroupSize;
5658  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, verticalKernel, 2, NULL, global_work_size, local_work_size, 0, NULL, &event);
5659  if (clStatus != CL_SUCCESS)
5660  {
5661    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
5662    goto cleanup;
5663  }
5664  clEnv->library->clFlush(queue);
5665  RecordProfileData(clEnv,ResizeVerticalKernel,event);
5666  clEnv->library->clReleaseEvent(event);
5667  status = MagickTrue;
5668
5669
5670cleanup:
5671  OpenCLLogException(__FUNCTION__,__LINE__,exception);
5672
5673  if (verticalKernel != NULL) RelinquishOpenCLKernel(clEnv, verticalKernel);
5674
5675  return(status);
5676}
5677
5678static Image *ComputeResizeImage(const Image* image,
5679  const size_t resizedColumns,const size_t resizedRows,
5680  const ResizeFilter *resizeFilter,ExceptionInfo *exception)
5681{
5682  CacheView
5683    *filteredImage_view,
5684    *image_view;
5685
5686  cl_command_queue
5687    queue;
5688
5689  cl_int
5690    clStatus;
5691
5692  cl_context
5693    context;
5694
5695  cl_mem
5696    cubicCoefficientsBuffer,
5697    filteredImageBuffer,
5698    imageBuffer,
5699    tempImageBuffer;
5700
5701  cl_mem_flags
5702    mem_flags;
5703
5704  const double
5705    *resizeFilterCoefficient;
5706
5707  const void
5708    *inputPixels;
5709
5710  float
5711    *mappedCoefficientBuffer,
5712    xFactor,
5713    yFactor;
5714
5715  MagickBooleanType
5716    outputReady,
5717    status;
5718
5719  MagickCLEnv
5720    clEnv;
5721
5722  MagickSizeType
5723    length;
5724
5725  Image
5726    *filteredImage;
5727
5728  unsigned int
5729    i,
5730    matte;
5731
5732  void
5733    *filteredPixels,
5734    *hostPtr;
5735
5736  outputReady = MagickFalse;
5737  filteredImage = NULL;
5738  filteredImage_view = NULL;
5739  clEnv = NULL;
5740  context = NULL;
5741  imageBuffer = NULL;
5742  tempImageBuffer = NULL;
5743  filteredImageBuffer = NULL;
5744  cubicCoefficientsBuffer = NULL;
5745  queue = NULL;
5746
5747  clEnv = GetDefaultOpenCLEnv();
5748  context = GetOpenCLContext(clEnv);
5749
5750  /* Create and initialize OpenCL buffers. */
5751  image_view=AcquireVirtualCacheView(image,exception);
5752  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
5753  if (inputPixels == (const void *) NULL)
5754  {
5755    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
5756    goto cleanup;
5757  }
5758
5759  /* If the host pointer is aligned to the size of CLPixelPacket,
5760     then use the host buffer directly from the GPU; otherwise,
5761     create a buffer on the GPU and copy the data over */
5762  if (ALIGNED(inputPixels,CLPixelPacket))
5763  {
5764    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
5765  }
5766  else
5767  {
5768    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
5769  }
5770  /* create a CL buffer from image pixel buffer */
5771  length = image->columns * image->rows;
5772  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
5773  if (clStatus != CL_SUCCESS)
5774  {
5775    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5776    goto cleanup;
5777  }
5778
5779  cubicCoefficientsBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, 7 * sizeof(float), NULL, &clStatus);
5780  if (clStatus != CL_SUCCESS)
5781  {
5782    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5783    goto cleanup;
5784  }
5785  queue = AcquireOpenCLCommandQueue(clEnv);
5786  mappedCoefficientBuffer = (float*)clEnv->library->clEnqueueMapBuffer(queue, cubicCoefficientsBuffer, CL_TRUE, CL_MAP_WRITE, 0, 7 * sizeof(float)
5787          , 0, NULL, NULL, &clStatus);
5788  if (clStatus != CL_SUCCESS)
5789  {
5790    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
5791    goto cleanup;
5792  }
5793  resizeFilterCoefficient = GetResizeFilterCoefficient(resizeFilter);
5794  for (i = 0; i < 7; i++)
5795  {
5796    mappedCoefficientBuffer[i] = (float) resizeFilterCoefficient[i];
5797  }
5798  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, cubicCoefficientsBuffer, mappedCoefficientBuffer, 0, NULL, NULL);
5799  if (clStatus != CL_SUCCESS)
5800  {
5801    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
5802    goto cleanup;
5803  }
5804
5805  filteredImage = CloneImage(image,resizedColumns,resizedRows,MagickTrue,exception);
5806  if (filteredImage == NULL)
5807    goto cleanup;
5808
5809  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
5810  {
5811    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
5812    goto cleanup;
5813  }
5814  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
5815  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
5816  if (filteredPixels == (void *) NULL)
5817  {
5818    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
5819    goto cleanup;
5820  }
5821
5822  if (ALIGNED(filteredPixels,CLPixelPacket))
5823  {
5824    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
5825    hostPtr = filteredPixels;
5826  }
5827  else
5828  {
5829    mem_flags = CL_MEM_WRITE_ONLY;
5830    hostPtr = NULL;
5831  }
5832
5833  /* create a CL buffer from image pixel buffer */
5834  length = filteredImage->columns * filteredImage->rows;
5835  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
5836  if (clStatus != CL_SUCCESS)
5837  {
5838    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5839    goto cleanup;
5840  }
5841
5842  xFactor=(float) resizedColumns/(float) image->columns;
5843  yFactor=(float) resizedRows/(float) image->rows;
5844  matte=(image->alpha_trait > CopyPixelTrait)?1:0;
5845  if (xFactor > yFactor)
5846  {
5847
5848    length = resizedColumns*image->rows;
5849    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
5850    if (clStatus != CL_SUCCESS)
5851    {
5852      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5853      goto cleanup;
5854    }
5855
5856    status = resizeHorizontalFilter(imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, matte
5857          , tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows
5858          , resizeFilter, cubicCoefficientsBuffer
5859          , xFactor, clEnv, queue, exception);
5860    if (status != MagickTrue)
5861      goto cleanup;
5862
5863    status = resizeVerticalFilter(tempImageBuffer, (unsigned int) resizedColumns, (unsigned int) image->rows, matte
5864       , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows
5865       , resizeFilter, cubicCoefficientsBuffer
5866       , yFactor, clEnv, queue, exception);
5867    if (status != MagickTrue)
5868      goto cleanup;
5869  }
5870  else
5871  {
5872    length = image->columns*resizedRows;
5873    tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length*sizeof(CLPixelPacket), NULL, &clStatus);
5874    if (clStatus != CL_SUCCESS)
5875    {
5876      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
5877      goto cleanup;
5878    }
5879
5880    status = resizeVerticalFilter(imageBuffer, (unsigned int) image->columns, (unsigned int) image->rows, matte
5881       , tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows
5882       , resizeFilter, cubicCoefficientsBuffer
5883       , yFactor, clEnv, queue, exception);
5884    if (status != MagickTrue)
5885      goto cleanup;
5886
5887    status = resizeHorizontalFilter(tempImageBuffer, (unsigned int) image->columns, (unsigned int) resizedRows, matte
5888       , filteredImageBuffer, (unsigned int) resizedColumns, (unsigned int) resizedRows
5889       , resizeFilter, cubicCoefficientsBuffer
5890       , xFactor, clEnv, queue, exception);
5891    if (status != MagickTrue)
5892      goto cleanup;
5893  }
5894  length = resizedColumns*resizedRows;
5895  if (ALIGNED(filteredPixels,CLPixelPacket))
5896  {
5897    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
5898  }
5899  else
5900  {
5901    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
5902  }
5903  if (clStatus != CL_SUCCESS)
5904  {
5905    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
5906    goto cleanup;
5907  }
5908  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
5909
5910cleanup:
5911  OpenCLLogException(__FUNCTION__,__LINE__,exception);
5912
5913  image_view=DestroyCacheView(image_view);
5914  if (filteredImage_view != NULL)
5915    filteredImage_view=DestroyCacheView(filteredImage_view);
5916
5917  if (imageBuffer!=NULL)		  clEnv->library->clReleaseMemObject(imageBuffer);
5918  if (tempImageBuffer!=NULL)		  clEnv->library->clReleaseMemObject(tempImageBuffer);
5919  if (filteredImageBuffer!=NULL)	  clEnv->library->clReleaseMemObject(filteredImageBuffer);
5920  if (cubicCoefficientsBuffer!=NULL)      clEnv->library->clReleaseMemObject(cubicCoefficientsBuffer);
5921  if (queue != NULL)  	                  RelinquishOpenCLCommandQueue(clEnv, queue);
5922  if (outputReady == MagickFalse && filteredImage != NULL)
5923    filteredImage=DestroyImage(filteredImage);
5924  return(filteredImage);
5925}
5926
5927static MagickBooleanType gpuSupportedResizeWeighting(
5928  ResizeWeightingFunctionType f)
5929{
5930  unsigned int
5931    i;
5932
5933  for (i = 0; ;i++)
5934  {
5935    if (supportedResizeWeighting[i] == LastWeightingFunction)
5936      break;
5937    if (supportedResizeWeighting[i] == f)
5938      return(MagickTrue);
5939  }
5940  return(MagickFalse);
5941}
5942
5943MagickExport Image *AccelerateResizeImage(const Image *image,
5944  const size_t resizedColumns,const size_t resizedRows,
5945  const ResizeFilter *resizeFilter,ExceptionInfo *exception)
5946{
5947  Image
5948    *filteredImage;
5949
5950  assert(image != NULL);
5951  assert(exception != (ExceptionInfo *) NULL);
5952
5953  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
5954      (checkAccelerateCondition(image, AllChannels) == MagickFalse))
5955    return NULL;
5956
5957  if (gpuSupportedResizeWeighting(GetResizeFilterWeightingType(resizeFilter)) == MagickFalse ||
5958      gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(resizeFilter)) == MagickFalse)
5959    return NULL;
5960
5961  filteredImage=ComputeResizeImage(image,resizedColumns,resizedRows,resizeFilter,exception);
5962  return(filteredImage);
5963}
5964
5965/*
5966%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5967%                                                                             %
5968%                                                                             %
5969%                                                                             %
5970%     A c c e l e r a t e R o t a t i o n a l B l u r I m a g e               %
5971%                                                                             %
5972%                                                                             %
5973%                                                                             %
5974%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
5975*/
5976
5977static Image* ComputeRotationalBlurImage(const Image *image,
5978  const ChannelType channel,const double angle,ExceptionInfo *exception)
5979{
5980  CacheView
5981    *image_view,
5982    *filteredImage_view;
5983
5984  cl_command_queue
5985    queue;
5986
5987  cl_context
5988    context;
5989
5990  cl_float2
5991    blurCenter;
5992
5993  cl_float4
5994    biasPixel;
5995
5996  cl_int
5997    clStatus;
5998
5999  cl_mem
6000    cosThetaBuffer,
6001    filteredImageBuffer,
6002    imageBuffer,
6003    sinThetaBuffer;
6004
6005  cl_mem_flags
6006    mem_flags;
6007
6008  cl_kernel
6009    rotationalBlurKernel;
6010
6011  cl_event
6012    event;
6013
6014  const void
6015    *inputPixels;
6016
6017  float
6018    blurRadius,
6019    *cosThetaPtr,
6020    offset,
6021    *sinThetaPtr,
6022    theta;
6023
6024  Image
6025    *filteredImage;
6026
6027  MagickBooleanType
6028    outputReady;
6029
6030  MagickCLEnv
6031    clEnv;
6032
6033  PixelInfo
6034    bias;
6035
6036  MagickSizeType
6037    length;
6038
6039  size_t
6040    global_work_size[2];
6041
6042  unsigned int
6043    cossin_theta_size,
6044    i,
6045    matte;
6046
6047  void
6048    *filteredPixels,
6049    *hostPtr;
6050
6051  outputReady = MagickFalse;
6052  context = NULL;
6053  filteredImage = NULL;
6054  filteredImage_view = NULL;
6055  imageBuffer = NULL;
6056  filteredImageBuffer = NULL;
6057  sinThetaBuffer = NULL;
6058  cosThetaBuffer = NULL;
6059  queue = NULL;
6060  rotationalBlurKernel = NULL;
6061
6062
6063  clEnv = GetDefaultOpenCLEnv();
6064  context = GetOpenCLContext(clEnv);
6065
6066
6067  /* Create and initialize OpenCL buffers. */
6068
6069  image_view=AcquireVirtualCacheView(image,exception);
6070  inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
6071  if (inputPixels == (const void *) NULL)
6072  {
6073    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
6074    goto cleanup;
6075  }
6076
6077  /* If the host pointer is aligned to the size of CLPixelPacket,
6078     then use the host buffer directly from the GPU; otherwise,
6079     create a buffer on the GPU and copy the data over */
6080  if (ALIGNED(inputPixels,CLPixelPacket))
6081  {
6082    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
6083  }
6084  else
6085  {
6086    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
6087  }
6088  /* create a CL buffer from image pixel buffer */
6089  length = image->columns * image->rows;
6090  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
6091  if (clStatus != CL_SUCCESS)
6092  {
6093    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6094    goto cleanup;
6095  }
6096
6097
6098  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
6099  assert(filteredImage != NULL);
6100  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
6101  {
6102    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
6103    goto cleanup;
6104  }
6105  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
6106  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
6107  if (filteredPixels == (void *) NULL)
6108  {
6109    (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
6110    goto cleanup;
6111  }
6112
6113  if (ALIGNED(filteredPixels,CLPixelPacket))
6114  {
6115    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
6116    hostPtr = filteredPixels;
6117  }
6118  else
6119  {
6120    mem_flags = CL_MEM_WRITE_ONLY;
6121    hostPtr = NULL;
6122  }
6123  /* create a CL buffer from image pixel buffer */
6124  length = image->columns * image->rows;
6125  filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
6126  if (clStatus != CL_SUCCESS)
6127  {
6128    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6129    goto cleanup;
6130  }
6131
6132  blurCenter.s[0] = (float) (image->columns-1)/2.0;
6133  blurCenter.s[1] = (float) (image->rows-1)/2.0;
6134  blurRadius=hypot(blurCenter.s[0],blurCenter.s[1]);
6135  cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
6136
6137  /* create a buffer for sin_theta and cos_theta */
6138  sinThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
6139  if (clStatus != CL_SUCCESS)
6140  {
6141    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6142    goto cleanup;
6143  }
6144  cosThetaBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, cossin_theta_size * sizeof(float), NULL, &clStatus);
6145  if (clStatus != CL_SUCCESS)
6146  {
6147    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6148    goto cleanup;
6149  }
6150
6151
6152  queue = AcquireOpenCLCommandQueue(clEnv);
6153  sinThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, sinThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
6154  if (clStatus != CL_SUCCESS)
6155  {
6156    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
6157    goto cleanup;
6158  }
6159
6160  cosThetaPtr = (float*) clEnv->library->clEnqueueMapBuffer(queue, cosThetaBuffer, CL_TRUE, CL_MAP_WRITE, 0, cossin_theta_size*sizeof(float), 0, NULL, NULL, &clStatus);
6161  if (clStatus != CL_SUCCESS)
6162  {
6163    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnqueuemapBuffer failed.",".");
6164    goto cleanup;
6165  }
6166
6167  theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
6168  offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
6169  for (i=0; i < (ssize_t) cossin_theta_size; i++)
6170  {
6171    cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
6172    sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
6173  }
6174
6175  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, sinThetaBuffer, sinThetaPtr, 0, NULL, NULL);
6176  clStatus |= clEnv->library->clEnqueueUnmapMemObject(queue, cosThetaBuffer, cosThetaPtr, 0, NULL, NULL);
6177  if (clStatus != CL_SUCCESS)
6178  {
6179    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
6180    goto cleanup;
6181  }
6182
6183  /* get the OpenCL kernel */
6184  rotationalBlurKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "RotationalBlur");
6185  if (rotationalBlurKernel == NULL)
6186  {
6187    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
6188    goto cleanup;
6189  }
6190
6191
6192  /* set the kernel arguments */
6193  i = 0;
6194  clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
6195  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
6196
6197  GetPixelInfo(image,&bias);
6198  biasPixel.s[0] = bias.red;
6199  biasPixel.s[1] = bias.green;
6200  biasPixel.s[2] = bias.blue;
6201  biasPixel.s[3] = bias.alpha;
6202  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float4), &biasPixel);
6203  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &channel);
6204
6205  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
6206  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &matte);
6207
6208  clStatus=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
6209
6210  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
6211  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
6212  clStatus|=clEnv->library->clSetKernelArg(rotationalBlurKernel,i++,sizeof(unsigned int), &cossin_theta_size);
6213  if (clStatus != CL_SUCCESS)
6214  {
6215    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6216    goto cleanup;
6217  }
6218
6219
6220  global_work_size[0] = image->columns;
6221  global_work_size[1] = image->rows;
6222  /* launch the kernel */
6223  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, rotationalBlurKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
6224  if (clStatus != CL_SUCCESS)
6225  {
6226    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6227    goto cleanup;
6228  }
6229  clEnv->library->clFlush(queue);
6230  RecordProfileData(clEnv,RotationalBlurKernel,event);
6231  clEnv->library->clReleaseEvent(event);
6232
6233  if (ALIGNED(filteredPixels,CLPixelPacket))
6234  {
6235    length = image->columns * image->rows;
6236    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
6237  }
6238  else
6239  {
6240    length = image->columns * image->rows;
6241    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
6242  }
6243  if (clStatus != CL_SUCCESS)
6244  {
6245    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
6246    goto cleanup;
6247  }
6248  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
6249
6250cleanup:
6251  OpenCLLogException(__FUNCTION__,__LINE__,exception);
6252
6253  image_view=DestroyCacheView(image_view);
6254  if (filteredImage_view != NULL)
6255    filteredImage_view=DestroyCacheView(filteredImage_view);
6256
6257  if (filteredImageBuffer!=NULL)  clEnv->library->clReleaseMemObject(filteredImageBuffer);
6258  if (imageBuffer!=NULL)     clEnv->library->clReleaseMemObject(imageBuffer);
6259  if (sinThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(sinThetaBuffer);
6260  if (cosThetaBuffer!=NULL)       clEnv->library->clReleaseMemObject(cosThetaBuffer);
6261  if (rotationalBlurKernel!=NULL) RelinquishOpenCLKernel(clEnv, rotationalBlurKernel);
6262  if (queue != NULL)              RelinquishOpenCLCommandQueue(clEnv, queue);
6263  if (outputReady == MagickFalse)
6264  {
6265    if (filteredImage != NULL)
6266    {
6267      DestroyImage(filteredImage);
6268      filteredImage = NULL;
6269    }
6270  }
6271  return filteredImage;
6272}
6273
6274MagickExport Image* AccelerateRotationalBlurImage(const Image *image,
6275  const ChannelType channel,const double angle,ExceptionInfo *exception)
6276{
6277  Image
6278    *filteredImage;
6279
6280  assert(image != NULL);
6281  assert(exception != (ExceptionInfo *) NULL);
6282
6283  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
6284      (checkAccelerateCondition(image, channel) == MagickFalse))
6285    return NULL;
6286
6287  filteredImage=ComputeRotationalBlurImage(image, channel, angle, exception);
6288  return filteredImage;
6289}
6290
6291/*
6292%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
6293%                                                                             %
6294%                                                                             %
6295%                                                                             %
6296%     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
6297%                                                                             %
6298%                                                                             %
6299%                                                                             %
6300%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
6301*/
6302
6303static Image *ComputeUnsharpMaskImage(const Image *image,
6304  const ChannelType channel,const double radius,const double sigma,
6305  const double gain,const double threshold,ExceptionInfo *exception)
6306{
6307  CacheView
6308    *filteredImage_view,
6309    *image_view;
6310
6311  char
6312    geometry[MagickPathExtent];
6313
6314  cl_command_queue
6315    queue;
6316
6317  cl_context
6318    context;
6319
6320  cl_int
6321    clStatus;
6322
6323  cl_kernel
6324    blurRowKernel,
6325    unsharpMaskBlurColumnKernel;
6326
6327  cl_event
6328    event;
6329
6330  cl_mem
6331    filteredImageBuffer,
6332    imageBuffer,
6333    imageKernelBuffer,
6334    tempImageBuffer;
6335
6336  cl_mem_flags
6337    mem_flags;
6338
6339  const void
6340    *inputPixels;
6341
6342  float
6343    fGain,
6344    fThreshold,
6345    *kernelBufferPtr;
6346
6347  Image
6348    *filteredImage;
6349
6350  int
6351    chunkSize;
6352
6353  KernelInfo
6354    *kernel;
6355
6356  MagickBooleanType
6357    outputReady;
6358
6359  MagickCLEnv
6360    clEnv;
6361
6362  MagickSizeType
6363    length;
6364
6365  void
6366    *filteredPixels,
6367    *hostPtr;
6368
6369  unsigned int
6370    i,
6371    imageColumns,
6372    imageRows,
6373    kernelWidth;
6374
6375  clEnv = NULL;
6376  filteredImage = NULL;
6377  filteredImage_view = NULL;
6378  kernel = NULL;
6379  context = NULL;
6380  imageBuffer = NULL;
6381  filteredImageBuffer = NULL;
6382  tempImageBuffer = NULL;
6383  imageKernelBuffer = NULL;
6384  blurRowKernel = NULL;
6385  unsharpMaskBlurColumnKernel = NULL;
6386  queue = NULL;
6387  outputReady = MagickFalse;
6388
6389  clEnv = GetDefaultOpenCLEnv();
6390  context = GetOpenCLContext(clEnv);
6391  queue = AcquireOpenCLCommandQueue(clEnv);
6392
6393  /* Create and initialize OpenCL buffers. */
6394  {
6395    image_view=AcquireVirtualCacheView(image,exception);
6396    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
6397    if (inputPixels == (const void *) NULL)
6398    {
6399      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
6400      goto cleanup;
6401    }
6402
6403    /* If the host pointer is aligned to the size of CLPixelPacket,
6404     then use the host buffer directly from the GPU; otherwise,
6405     create a buffer on the GPU and copy the data over */
6406    if (ALIGNED(inputPixels,CLPixelPacket))
6407    {
6408      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
6409    }
6410    else
6411    {
6412      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
6413    }
6414    /* create a CL buffer from image pixel buffer */
6415    length = image->columns * image->rows;
6416    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
6417    if (clStatus != CL_SUCCESS)
6418    {
6419      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6420      goto cleanup;
6421    }
6422  }
6423
6424  /* create output */
6425  {
6426    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
6427    assert(filteredImage != NULL);
6428    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
6429    {
6430      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
6431      goto cleanup;
6432    }
6433    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
6434    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
6435    if (filteredPixels == (void *) NULL)
6436    {
6437      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
6438      goto cleanup;
6439    }
6440
6441    if (ALIGNED(filteredPixels,CLPixelPacket))
6442    {
6443      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
6444      hostPtr = filteredPixels;
6445    }
6446    else
6447    {
6448      mem_flags = CL_MEM_WRITE_ONLY;
6449      hostPtr = NULL;
6450    }
6451
6452    /* create a CL buffer from image pixel buffer */
6453    length = image->columns * image->rows;
6454    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
6455    if (clStatus != CL_SUCCESS)
6456    {
6457      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6458      goto cleanup;
6459    }
6460  }
6461
6462  /* create the blur kernel */
6463  {
6464    (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
6465    kernel=AcquireKernelInfo(geometry,exception);
6466    if (kernel == (KernelInfo *) NULL)
6467    {
6468      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
6469      goto cleanup;
6470    }
6471
6472    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
6473    if (clStatus != CL_SUCCESS)
6474    {
6475      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6476      goto cleanup;
6477    }
6478
6479
6480    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
6481    if (clStatus != CL_SUCCESS)
6482    {
6483      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
6484      goto cleanup;
6485    }
6486    for (i = 0; i < kernel->width; i++)
6487    {
6488      kernelBufferPtr[i] = (float) kernel->values[i];
6489    }
6490    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
6491    if (clStatus != CL_SUCCESS)
6492    {
6493      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
6494      goto cleanup;
6495    }
6496  }
6497
6498  {
6499    /* create temp buffer */
6500    {
6501      length = image->columns * image->rows;
6502      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
6503      if (clStatus != CL_SUCCESS)
6504      {
6505        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6506        goto cleanup;
6507      }
6508    }
6509
6510    /* get the opencl kernel */
6511    {
6512      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRow");
6513      if (blurRowKernel == NULL)
6514      {
6515        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
6516        goto cleanup;
6517      };
6518
6519      unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumn");
6520      if (unsharpMaskBlurColumnKernel == NULL)
6521      {
6522        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
6523        goto cleanup;
6524      };
6525    }
6526
6527    {
6528      chunkSize = 256;
6529
6530      imageColumns = (unsigned int) image->columns;
6531      imageRows = (unsigned int) image->rows;
6532
6533      kernelWidth = (unsigned int) kernel->width;
6534
6535      /* set the kernel arguments */
6536      i = 0;
6537      clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
6538      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
6539      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
6540      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
6541      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
6542      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
6543      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
6544      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
6545      if (clStatus != CL_SUCCESS)
6546      {
6547        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6548        goto cleanup;
6549      }
6550    }
6551
6552    /* launch the kernel */
6553    {
6554      size_t gsize[2];
6555      size_t wsize[2];
6556
6557      gsize[0] = chunkSize*((image->columns+chunkSize-1)/chunkSize);
6558      gsize[1] = image->rows;
6559      wsize[0] = chunkSize;
6560      wsize[1] = 1;
6561
6562	  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
6563      if (clStatus != CL_SUCCESS)
6564      {
6565        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6566        goto cleanup;
6567      }
6568      clEnv->library->clFlush(queue);
6569      RecordProfileData(clEnv,BlurRowKernel,event);
6570      clEnv->library->clReleaseEvent(event);
6571    }
6572
6573
6574    {
6575      chunkSize = 256;
6576      imageColumns = (unsigned int) image->columns;
6577      imageRows = (unsigned int) image->rows;
6578      kernelWidth = (unsigned int) kernel->width;
6579      fGain = (float) gain;
6580      fThreshold = (float) threshold;
6581
6582      i = 0;
6583      clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
6584      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
6585      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
6586      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
6587      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
6588      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
6589      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
6590      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
6591      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
6592      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
6593      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
6594      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
6595
6596      if (clStatus != CL_SUCCESS)
6597      {
6598        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6599        goto cleanup;
6600      }
6601    }
6602
6603    /* launch the kernel */
6604    {
6605      size_t gsize[2];
6606      size_t wsize[2];
6607
6608      gsize[0] = image->columns;
6609      gsize[1] = chunkSize*((image->rows+chunkSize-1)/chunkSize);
6610      wsize[0] = 1;
6611      wsize[1] = chunkSize;
6612
6613	  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
6614      if (clStatus != CL_SUCCESS)
6615      {
6616        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6617        goto cleanup;
6618      }
6619      clEnv->library->clFlush(queue);
6620      RecordProfileData(clEnv,UnsharpMaskBlurColumnKernel,event);
6621      clEnv->library->clReleaseEvent(event);
6622    }
6623
6624  }
6625
6626  /* get result */
6627  if (ALIGNED(filteredPixels,CLPixelPacket))
6628  {
6629    length = image->columns * image->rows;
6630    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
6631  }
6632  else
6633  {
6634    length = image->columns * image->rows;
6635    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
6636  }
6637  if (clStatus != CL_SUCCESS)
6638  {
6639    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
6640    goto cleanup;
6641  }
6642
6643  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
6644
6645cleanup:
6646  OpenCLLogException(__FUNCTION__,__LINE__,exception);
6647
6648  image_view=DestroyCacheView(image_view);
6649  if (filteredImage_view != NULL)
6650    filteredImage_view=DestroyCacheView(filteredImage_view);
6651
6652  if (kernel != NULL)			      kernel=DestroyKernelInfo(kernel);
6653  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
6654  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
6655  if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
6656  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
6657  if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
6658  if (unsharpMaskBlurColumnKernel!=NULL)      RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
6659  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
6660  if (outputReady == MagickFalse)
6661  {
6662    if (filteredImage != NULL)
6663    {
6664      DestroyImage(filteredImage);
6665      filteredImage = NULL;
6666    }
6667  }
6668  return(filteredImage);
6669}
6670
6671static Image *ComputeUnsharpMaskImageSection(const Image *image,
6672  const ChannelType channel,const double radius,const double sigma,
6673  const double gain,const double threshold,ExceptionInfo *exception)
6674{
6675  CacheView
6676    *filteredImage_view,
6677    *image_view;
6678
6679  char
6680    geometry[MagickPathExtent];
6681
6682  cl_command_queue
6683    queue;
6684
6685  cl_context
6686    context;
6687
6688  cl_int
6689    clStatus;
6690
6691  cl_kernel
6692    blurRowKernel,
6693    unsharpMaskBlurColumnKernel;
6694
6695  cl_event
6696    event;
6697
6698  cl_mem
6699    filteredImageBuffer,
6700    imageBuffer,
6701    imageKernelBuffer,
6702    tempImageBuffer;
6703
6704  cl_mem_flags
6705    mem_flags;
6706
6707  const void
6708    *inputPixels;
6709
6710  float
6711    fGain,
6712    fThreshold,
6713    *kernelBufferPtr;
6714
6715  Image
6716    *filteredImage;
6717
6718  int
6719    chunkSize;
6720
6721  KernelInfo
6722    *kernel;
6723
6724  MagickBooleanType
6725    outputReady;
6726
6727  MagickCLEnv
6728    clEnv;
6729
6730  MagickSizeType
6731    length;
6732
6733  void
6734    *filteredPixels,
6735    *hostPtr;
6736
6737  unsigned int
6738    i,
6739    imageColumns,
6740    imageRows,
6741    kernelWidth;
6742
6743  clEnv = NULL;
6744  filteredImage = NULL;
6745  filteredImage_view = NULL;
6746  kernel = NULL;
6747  context = NULL;
6748  imageBuffer = NULL;
6749  filteredImageBuffer = NULL;
6750  tempImageBuffer = NULL;
6751  imageKernelBuffer = NULL;
6752  blurRowKernel = NULL;
6753  unsharpMaskBlurColumnKernel = NULL;
6754  queue = NULL;
6755  outputReady = MagickFalse;
6756
6757  clEnv = GetDefaultOpenCLEnv();
6758  context = GetOpenCLContext(clEnv);
6759  queue = AcquireOpenCLCommandQueue(clEnv);
6760
6761  /* Create and initialize OpenCL buffers. */
6762  {
6763    image_view=AcquireVirtualCacheView(image,exception);
6764    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
6765    if (inputPixels == (const void *) NULL)
6766    {
6767      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
6768      goto cleanup;
6769    }
6770
6771    /* If the host pointer is aligned to the size of CLPixelPacket,
6772     then use the host buffer directly from the GPU; otherwise,
6773     create a buffer on the GPU and copy the data over */
6774    if (ALIGNED(inputPixels,CLPixelPacket))
6775    {
6776      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
6777    }
6778    else
6779    {
6780      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
6781    }
6782    /* create a CL buffer from image pixel buffer */
6783    length = image->columns * image->rows;
6784    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
6785    if (clStatus != CL_SUCCESS)
6786    {
6787      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6788      goto cleanup;
6789    }
6790  }
6791
6792  /* create output */
6793  {
6794    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
6795    assert(filteredImage != NULL);
6796    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
6797    {
6798      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
6799      goto cleanup;
6800    }
6801    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
6802    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
6803    if (filteredPixels == (void *) NULL)
6804    {
6805      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
6806      goto cleanup;
6807    }
6808
6809    if (ALIGNED(filteredPixels,CLPixelPacket))
6810    {
6811      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
6812      hostPtr = filteredPixels;
6813    }
6814    else
6815    {
6816      mem_flags = CL_MEM_WRITE_ONLY;
6817      hostPtr = NULL;
6818    }
6819
6820    /* create a CL buffer from image pixel buffer */
6821    length = image->columns * image->rows;
6822    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
6823    if (clStatus != CL_SUCCESS)
6824    {
6825      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6826      goto cleanup;
6827    }
6828  }
6829
6830  /* create the blur kernel */
6831  {
6832    (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
6833    kernel=AcquireKernelInfo(geometry,exception);
6834    if (kernel == (KernelInfo *) NULL)
6835    {
6836      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
6837      goto cleanup;
6838    }
6839
6840    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
6841    if (clStatus != CL_SUCCESS)
6842    {
6843      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6844      goto cleanup;
6845    }
6846
6847
6848    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
6849    if (clStatus != CL_SUCCESS)
6850    {
6851      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
6852      goto cleanup;
6853    }
6854    for (i = 0; i < kernel->width; i++)
6855    {
6856      kernelBufferPtr[i] = (float) kernel->values[i];
6857    }
6858    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
6859    if (clStatus != CL_SUCCESS)
6860    {
6861      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
6862      goto cleanup;
6863    }
6864  }
6865
6866  {
6867    unsigned int offsetRows;
6868    unsigned int sec;
6869
6870    /* create temp buffer */
6871    {
6872      length = image->columns * (image->rows / 2 + 1 + (kernel->width-1) / 2);
6873      tempImageBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_WRITE, length * 4 * sizeof(float), NULL, &clStatus);
6874      if (clStatus != CL_SUCCESS)
6875      {
6876        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
6877        goto cleanup;
6878      }
6879    }
6880
6881    /* get the opencl kernel */
6882    {
6883      blurRowKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "BlurRowSection");
6884      if (blurRowKernel == NULL)
6885      {
6886        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
6887        goto cleanup;
6888      };
6889
6890      unsharpMaskBlurColumnKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMaskBlurColumnSection");
6891      if (unsharpMaskBlurColumnKernel == NULL)
6892      {
6893        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
6894        goto cleanup;
6895      };
6896    }
6897
6898    for (sec = 0; sec < 2; sec++)
6899    {
6900      {
6901        chunkSize = 256;
6902
6903        imageColumns = (unsigned int) image->columns;
6904        if (sec == 0)
6905          imageRows = (unsigned int) (image->rows / 2 + (kernel->width-1) / 2);
6906        else
6907          imageRows = (unsigned int) ((image->rows - image->rows / 2) + (kernel->width-1) / 2);
6908
6909        offsetRows = (unsigned int) (sec * image->rows / 2);
6910
6911        kernelWidth = (unsigned int) kernel->width;
6912
6913        /* set the kernel arguments */
6914        i = 0;
6915        clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
6916        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
6917        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(ChannelType),&channel);
6918        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
6919        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
6920        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
6921        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
6922        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(CLPixelPacket)*(chunkSize+kernel->width),(void *) NULL);
6923        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
6924        clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&sec);
6925        if (clStatus != CL_SUCCESS)
6926        {
6927          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6928          goto cleanup;
6929        }
6930      }
6931      /* launch the kernel */
6932      {
6933        size_t gsize[2];
6934        size_t wsize[2];
6935
6936        gsize[0] = chunkSize*((imageColumns+chunkSize-1)/chunkSize);
6937        gsize[1] = imageRows;
6938        wsize[0] = chunkSize;
6939        wsize[1] = 1;
6940
6941		clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
6942        if (clStatus != CL_SUCCESS)
6943        {
6944          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
6945          goto cleanup;
6946        }
6947        clEnv->library->clFlush(queue);
6948        RecordProfileData(clEnv,BlurRowKernel,event);
6949        clEnv->library->clReleaseEvent(event);
6950      }
6951
6952
6953      {
6954        chunkSize = 256;
6955
6956        imageColumns = (unsigned int) image->columns;
6957        if (sec == 0)
6958          imageRows = (unsigned int) (image->rows / 2);
6959        else
6960          imageRows = (unsigned int) (image->rows - image->rows / 2);
6961
6962        offsetRows = (unsigned int) (sec * image->rows / 2);
6963
6964        kernelWidth = (unsigned int) kernel->width;
6965
6966        fGain = (float) gain;
6967        fThreshold = (float) threshold;
6968
6969        i = 0;
6970        clStatus=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
6971        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
6972        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
6973        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
6974        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
6975        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, (chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
6976        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++, kernelWidth*sizeof(float),NULL);
6977        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&channel);
6978        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
6979        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
6980        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
6981        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
6982        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&offsetRows);
6983        clStatus|=clEnv->library->clSetKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(unsigned int),(void *)&sec);
6984
6985        if (clStatus != CL_SUCCESS)
6986        {
6987          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
6988          goto cleanup;
6989        }
6990      }
6991
6992      /* launch the kernel */
6993      {
6994        size_t gsize[2];
6995        size_t wsize[2];
6996
6997        gsize[0] = imageColumns;
6998        gsize[1] = chunkSize*((imageRows+chunkSize-1)/chunkSize);
6999        wsize[0] = 1;
7000        wsize[1] = chunkSize;
7001
7002		clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskBlurColumnKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
7003        if (clStatus != CL_SUCCESS)
7004        {
7005          (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
7006          goto cleanup;
7007        }
7008        clEnv->library->clFlush(queue);
7009        RecordProfileData(clEnv,UnsharpMaskBlurColumnKernel,event);
7010        clEnv->library->clReleaseEvent(event);
7011      }
7012    }
7013  }
7014
7015  /* get result */
7016  if (ALIGNED(filteredPixels,CLPixelPacket))
7017  {
7018    length = image->columns * image->rows;
7019    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
7020  }
7021  else
7022  {
7023    length = image->columns * image->rows;
7024    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
7025  }
7026  if (clStatus != CL_SUCCESS)
7027  {
7028    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
7029    goto cleanup;
7030  }
7031
7032  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
7033
7034cleanup:
7035  OpenCLLogException(__FUNCTION__,__LINE__,exception);
7036
7037  image_view=DestroyCacheView(image_view);
7038  if (filteredImage_view != NULL)
7039    filteredImage_view=DestroyCacheView(filteredImage_view);
7040
7041  if (kernel != NULL)			      kernel=DestroyKernelInfo(kernel);
7042  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
7043  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
7044  if (tempImageBuffer!=NULL)                  clEnv->library->clReleaseMemObject(tempImageBuffer);
7045  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
7046  if (blurRowKernel!=NULL)                    RelinquishOpenCLKernel(clEnv, blurRowKernel);
7047  if (unsharpMaskBlurColumnKernel!=NULL)      RelinquishOpenCLKernel(clEnv, unsharpMaskBlurColumnKernel);
7048  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
7049  if (outputReady == MagickFalse)
7050  {
7051    if (filteredImage != NULL)
7052    {
7053      DestroyImage(filteredImage);
7054      filteredImage = NULL;
7055    }
7056  }
7057  return filteredImage;
7058}
7059
7060static Image *ComputeUnsharpMaskImageSingle(const Image *image,
7061  const ChannelType channel,const double radius,const double sigma,
7062  const double gain,const double threshold,int blurOnly, ExceptionInfo *exception)
7063{
7064  CacheView
7065    *filteredImage_view,
7066    *image_view;
7067
7068  char
7069    geometry[MagickPathExtent];
7070
7071  cl_command_queue
7072    queue;
7073
7074  cl_context
7075    context;
7076
7077  cl_int
7078    justBlur,
7079    clStatus;
7080
7081  cl_kernel
7082    unsharpMaskKernel;
7083
7084  cl_event
7085    event;
7086
7087  cl_mem
7088    filteredImageBuffer,
7089    imageBuffer,
7090    imageKernelBuffer;
7091
7092  cl_mem_flags
7093    mem_flags;
7094
7095  const void
7096    *inputPixels;
7097
7098  float
7099    fGain,
7100    fThreshold,
7101    *kernelBufferPtr;
7102
7103  Image
7104    *filteredImage;
7105
7106  KernelInfo
7107    *kernel;
7108
7109  MagickBooleanType
7110    outputReady;
7111
7112  MagickCLEnv
7113    clEnv;
7114
7115  MagickSizeType
7116    length;
7117
7118  void
7119    *filteredPixels,
7120    *hostPtr;
7121
7122  unsigned int
7123    i,
7124    imageColumns,
7125    imageRows,
7126    kernelWidth;
7127
7128  clEnv = NULL;
7129  filteredImage = NULL;
7130  filteredImage_view = NULL;
7131  kernel = NULL;
7132  context = NULL;
7133  imageBuffer = NULL;
7134  filteredImageBuffer = NULL;
7135  imageKernelBuffer = NULL;
7136  unsharpMaskKernel = NULL;
7137  queue = NULL;
7138  outputReady = MagickFalse;
7139
7140  clEnv = GetDefaultOpenCLEnv();
7141  context = GetOpenCLContext(clEnv);
7142  queue = AcquireOpenCLCommandQueue(clEnv);
7143
7144  /* Create and initialize OpenCL buffers. */
7145  {
7146    image_view=AcquireVirtualCacheView(image,exception);
7147    inputPixels=GetCacheViewVirtualPixels(image_view,0,0,image->columns,image->rows,exception);
7148    if (inputPixels == (const void *) NULL)
7149    {
7150      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
7151      goto cleanup;
7152    }
7153
7154    /* If the host pointer is aligned to the size of CLPixelPacket,
7155     then use the host buffer directly from the GPU; otherwise,
7156     create a buffer on the GPU and copy the data over */
7157    if (ALIGNED(inputPixels,CLPixelPacket))
7158    {
7159      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
7160    }
7161    else
7162    {
7163      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
7164    }
7165    /* create a CL buffer from image pixel buffer */
7166    length = image->columns * image->rows;
7167    imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
7168    if (clStatus != CL_SUCCESS)
7169    {
7170      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
7171      goto cleanup;
7172    }
7173  }
7174
7175  /* create output */
7176  {
7177    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
7178    assert(filteredImage != NULL);
7179    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
7180    {
7181      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", "'%s'", ".");
7182      goto cleanup;
7183    }
7184    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
7185    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
7186    if (filteredPixels == (void *) NULL)
7187    {
7188      (void) OpenCLThrowMagickException(exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
7189      goto cleanup;
7190    }
7191
7192    if (ALIGNED(filteredPixels,CLPixelPacket))
7193    {
7194      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
7195      hostPtr = filteredPixels;
7196    }
7197    else
7198    {
7199      mem_flags = CL_MEM_WRITE_ONLY;
7200      hostPtr = NULL;
7201    }
7202
7203    /* create a CL buffer from image pixel buffer */
7204    length = image->columns * image->rows;
7205    filteredImageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
7206    if (clStatus != CL_SUCCESS)
7207    {
7208      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
7209      goto cleanup;
7210    }
7211  }
7212
7213  /* create the blur kernel */
7214  {
7215    (void) FormatLocaleString(geometry,MagickPathExtent,"blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
7216    kernel=AcquireKernelInfo(geometry,exception);
7217    if (kernel == (KernelInfo *) NULL)
7218    {
7219      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireKernelInfo failed.",".");
7220      goto cleanup;
7221    }
7222
7223    imageKernelBuffer = clEnv->library->clCreateBuffer(context, CL_MEM_READ_ONLY, kernel->width * sizeof(float), NULL, &clStatus);
7224    if (clStatus != CL_SUCCESS)
7225    {
7226      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
7227      goto cleanup;
7228    }
7229
7230
7231    kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer, CL_TRUE, CL_MAP_WRITE, 0, kernel->width * sizeof(float), 0, NULL, NULL, &clStatus);
7232    if (clStatus != CL_SUCCESS)
7233    {
7234      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
7235      goto cleanup;
7236    }
7237    for (i = 0; i < kernel->width; i++)
7238    {
7239      kernelBufferPtr[i] = (float) kernel->values[i];
7240    }
7241    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr, 0, NULL, NULL);
7242    if (clStatus != CL_SUCCESS)
7243    {
7244      (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", "'%s'", ".");
7245      goto cleanup;
7246    }
7247  }
7248
7249  {
7250    /* get the opencl kernel */
7251    {
7252      unsharpMaskKernel = AcquireOpenCLKernel(clEnv, MAGICK_OPENCL_ACCELERATE, "UnsharpMask");
7253      if (unsharpMaskKernel == NULL)
7254      {
7255        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", "'%s'", ".");
7256        goto cleanup;
7257      };
7258    }
7259
7260    {
7261      imageColumns = (unsigned int) image->columns;
7262      imageRows = (unsigned int) image->rows;
7263      kernelWidth = (unsigned int) kernel->width;
7264      fGain = (float) gain;
7265      fThreshold = (float) threshold;
7266      justBlur = blurOnly;
7267
7268      /* set the kernel arguments */
7269      i = 0;
7270      clStatus=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
7271      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
7272      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
7273      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&kernelWidth);
7274      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
7275      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(unsigned int),(void *)&imageRows);
7276      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernel->width)),(void *) NULL);
7277      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
7278      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
7279      clStatus|=clEnv->library->clSetKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&justBlur);
7280      if (clStatus != CL_SUCCESS)
7281      {
7282        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", "'%s'", ".");
7283        goto cleanup;
7284      }
7285    }
7286
7287    /* launch the kernel */
7288    {
7289      size_t gsize[2];
7290      size_t wsize[2];
7291
7292      gsize[0] = ((image->columns + 7) / 8) * 8;
7293      gsize[1] = ((image->rows + 31) / 32) * 32;
7294      wsize[0] = 8;
7295      wsize[1] = 32;
7296
7297	  clStatus = clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, unsharpMaskKernel, 2, NULL, gsize, wsize, 0, NULL, &event);
7298      if (clStatus != CL_SUCCESS)
7299      {
7300        (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", "'%s'", ".");
7301        goto cleanup;
7302      }
7303      clEnv->library->clFlush(queue);
7304      RecordProfileData(clEnv,UnsharpMaskKernel,event);
7305      clEnv->library->clReleaseEvent(event);
7306    }
7307  }
7308
7309  /* get result */
7310  if (ALIGNED(filteredPixels,CLPixelPacket))
7311  {
7312    length = image->columns * image->rows;
7313    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
7314  }
7315  else
7316  {
7317    length = image->columns * image->rows;
7318    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
7319  }
7320  if (clStatus != CL_SUCCESS)
7321  {
7322    (void) OpenCLThrowMagickException(exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", "'%s'", ".");
7323    goto cleanup;
7324  }
7325
7326  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
7327
7328cleanup:
7329  OpenCLLogException(__FUNCTION__,__LINE__,exception);
7330
7331  image_view=DestroyCacheView(image_view);
7332  if (filteredImage_view != NULL)
7333    filteredImage_view=DestroyCacheView(filteredImage_view);
7334
7335  if (kernel != NULL)			      kernel=DestroyKernelInfo(kernel);
7336  if (imageBuffer!=NULL)		      clEnv->library->clReleaseMemObject(imageBuffer);
7337  if (filteredImageBuffer!=NULL)              clEnv->library->clReleaseMemObject(filteredImageBuffer);
7338  if (imageKernelBuffer!=NULL)                clEnv->library->clReleaseMemObject(imageKernelBuffer);
7339  if (unsharpMaskKernel!=NULL)                RelinquishOpenCLKernel(clEnv, unsharpMaskKernel);
7340  if (queue != NULL)                          RelinquishOpenCLCommandQueue(clEnv, queue);
7341  if (outputReady == MagickFalse)
7342  {
7343    if (filteredImage != NULL)
7344    {
7345      DestroyImage(filteredImage);
7346      filteredImage = NULL;
7347    }
7348  }
7349  return(filteredImage);
7350}
7351
7352MagickExport Image *AccelerateUnsharpMaskImage(const Image *image,
7353  const ChannelType channel,const double radius,const double sigma,
7354  const double gain,const double threshold,ExceptionInfo *exception)
7355{
7356  Image
7357    *filteredImage;
7358
7359  assert(image != NULL);
7360  assert(exception != (ExceptionInfo *) NULL);
7361
7362  if ((checkOpenCLEnvironment(exception) == MagickFalse) ||
7363      (checkAccelerateCondition(image, channel) == MagickFalse))
7364    return NULL;
7365
7366  if (radius < 12.1)
7367    filteredImage = ComputeUnsharpMaskImageSingle(image,channel,radius,sigma,gain,threshold, 0, exception);
7368  else if (splitImage(image) && (image->rows / 2 > radius))
7369    filteredImage = ComputeUnsharpMaskImageSection(image,channel,radius,sigma,gain,threshold,exception);
7370  else
7371    filteredImage = ComputeUnsharpMaskImage(image,channel,radius,sigma,gain,threshold,exception);
7372  return(filteredImage);
7373}
7374
7375#else  /* MAGICKCORE_OPENCL_SUPPORT  */
7376
7377MagickExport Image *AccelerateAddNoiseImage(const Image *magick_unused(image),
7378  const ChannelType magick_unused(channel),
7379  const NoiseType magick_unused(noise_type),
7380  ExceptionInfo *magick_unused(exception))
7381{
7382  magick_unreferenced(image);
7383  magick_unreferenced(channel);
7384  magick_unreferenced(noise_type);
7385  magick_unreferenced(exception);
7386  return((Image *) NULL);
7387}
7388
7389MagickExport Image *AccelerateBlurImage(const Image *magick_unused(image),
7390  const ChannelType magick_unused(channel),const double magick_unused(radius),
7391  const double magick_unused(sigma),ExceptionInfo *magick_unused(exception))
7392{
7393  magick_unreferenced(image);
7394  magick_unreferenced(channel);
7395  magick_unreferenced(radius);
7396  magick_unreferenced(sigma);
7397  magick_unreferenced(exception);
7398
7399  return((Image *) NULL);
7400}
7401
7402MagickExport MagickBooleanType AccelerateCompositeImage(
7403  Image *magick_unused(image),const ChannelType magick_unused(channel),
7404  const CompositeOperator magick_unused(compose),
7405  const Image *magick_unused(composite),const ssize_t magick_unused(x_offset),
7406  const ssize_t magick_unused(y_offset),
7407  const float magick_unused(destination_dissolve),
7408  const float magick_unused(source_dissolve),
7409  ExceptionInfo *magick_unused(exception))
7410{
7411  magick_unreferenced(image);
7412  magick_unreferenced(channel);
7413  magick_unreferenced(compose);
7414  magick_unreferenced(composite);
7415  magick_unreferenced(x_offset);
7416  magick_unreferenced(y_offset);
7417  magick_unreferenced(destination_dissolve);
7418  magick_unreferenced(source_dissolve);
7419  magick_unreferenced(exception);
7420
7421  return(MagickFalse);
7422}
7423
7424MagickExport MagickBooleanType AccelerateContrastImage(
7425  Image* magick_unused(image),const MagickBooleanType magick_unused(sharpen),
7426  ExceptionInfo* magick_unused(exception))
7427{
7428  magick_unreferenced(image);
7429  magick_unreferenced(sharpen);
7430  magick_unreferenced(exception);
7431
7432  return(MagickFalse);
7433}
7434
7435MagickExport MagickBooleanType AccelerateContrastStretchImage(
7436  Image *magick_unused(image),const ChannelType magick_unused(channel),
7437  const double magick_unused(black_point),
7438  const double magick_unused(white_point),
7439  ExceptionInfo* magick_unused(exception))
7440{
7441  magick_unreferenced(image);
7442  magick_unreferenced(channel);
7443  magick_unreferenced(black_point);
7444  magick_unreferenced(white_point);
7445  magick_unreferenced(exception);
7446
7447  return(MagickFalse);
7448}
7449
7450MagickExport Image *AccelerateConvolveImage(
7451  const Image *magick_unused(image),const ChannelType magick_unused(channel),
7452  const KernelInfo *magick_unused(kernel),
7453  ExceptionInfo *magick_unused(exception))
7454{
7455  magick_unreferenced(image);
7456  magick_unreferenced(channel);
7457  magick_unreferenced(kernel);
7458  magick_unreferenced(exception);
7459
7460  return((Image *) NULL);
7461}
7462
7463MagickExport MagickBooleanType AccelerateEqualizeImage(
7464  Image* magick_unused(image), const ChannelType magick_unused(channel),
7465  ExceptionInfo* magick_unused(exception))
7466{
7467  magick_unreferenced(image);
7468  magick_unreferenced(channel);
7469  magick_unreferenced(exception);
7470
7471  return(MagickFalse);
7472}
7473
7474MagickExport Image *AccelerateDespeckleImage(const Image* magick_unused(image),
7475  ExceptionInfo* magick_unused(exception))
7476{
7477  magick_unreferenced(image);
7478  magick_unreferenced(exception);
7479
7480  return((Image *) NULL);
7481}
7482
7483MagickExport MagickBooleanType AccelerateFunctionImage(
7484  Image *magick_unused(image),const ChannelType magick_unused(channel),
7485  const MagickFunction magick_unused(function),
7486  const size_t magick_unused(number_parameters),
7487  const double *magick_unused(parameters),
7488  ExceptionInfo *magick_unused(exception))
7489{
7490  magick_unreferenced(image);
7491  magick_unreferenced(channel);
7492  magick_unreferenced(function);
7493  magick_unreferenced(number_parameters);
7494  magick_unreferenced(parameters);
7495  magick_unreferenced(exception);
7496
7497  return(MagickFalse);
7498}
7499
7500MagickExport MagickBooleanType AccelerateGrayscaleImage(
7501  Image *magick_unused(image),const PixelIntensityMethod magick_unused(method),
7502  ExceptionInfo *magick_unused(exception))
7503{
7504  magick_unreferenced(image);
7505  magick_unreferenced(method);
7506  magick_unreferenced(exception);
7507
7508  return(MagickFalse);
7509}
7510
7511MagickExport Image *AccelerateLocalContrastImage(
7512  const Image *magick_unused(image),const double magick_unused(radius),
7513  const double magick_unused(strength),ExceptionInfo *magick_unused(exception))
7514{
7515  magick_unreferenced(image);
7516  magick_unreferenced(radius);
7517  magick_unreferenced(strength);
7518  magick_unreferenced(exception);
7519
7520  return((Image *) NULL);
7521}
7522
7523MagickExport MagickBooleanType AccelerateModulateImage(
7524  Image *magick_unused(image),double magick_unused(percent_brightness),
7525  double magick_unused(percent_hue),double magick_unused(percent_saturation),
7526  ColorspaceType magick_unused(colorspace),
7527  ExceptionInfo *magick_unused(exception))
7528{
7529  magick_unreferenced(image);
7530  magick_unreferenced(percent_brightness);
7531  magick_unreferenced(percent_hue);
7532  magick_unreferenced(percent_saturation);
7533  magick_unreferenced(colorspace);
7534  magick_unreferenced(exception);
7535
7536  return(MagickFalse);
7537}
7538
7539MagickExport Image *AccelerateMotionBlurImage(
7540  const Image *magick_unused(image),const ChannelType magick_unused(channel),
7541  const double *magick_unused(kernel),const size_t magick_unused(width),
7542  const OffsetInfo *magick_unused(offset),
7543  ExceptionInfo *magick_unused(exception))
7544{
7545  magick_unreferenced(image);
7546  magick_unreferenced(channel);
7547  magick_unreferenced(kernel);
7548  magick_unreferenced(width);
7549  magick_unreferenced(offset);
7550  magick_unreferenced(exception);
7551
7552  return((Image *) NULL);
7553}
7554
7555MagickExport MagickBooleanType AccelerateRandomImage(
7556  Image *magick_unused(image),ExceptionInfo *magick_unused(exception))
7557{
7558  magick_unreferenced(image);
7559  magick_unreferenced(exception);
7560
7561  return MagickFalse;
7562}
7563
7564MagickExport Image *AccelerateResizeImage(const Image *magick_unused(image),
7565  const size_t magick_unused(resizedColumns),
7566  const size_t magick_unused(resizedRows),
7567  const ResizeFilter *magick_unused(resizeFilter),
7568  ExceptionInfo *magick_unused(exception))
7569{
7570  magick_unreferenced(image);
7571  magick_unreferenced(resizedColumns);
7572  magick_unreferenced(resizedRows);
7573  magick_unreferenced(resizeFilter);
7574  magick_unreferenced(exception);
7575
7576  return((Image *) NULL);
7577}
7578
7579MagickExport Image *AccelerateRotationalBlurImage(
7580  const Image *magick_unused(image),const ChannelType magick_unused(channel),
7581  const double magick_unused(angle),ExceptionInfo *magick_unused(exception))
7582{
7583  magick_unreferenced(image);
7584  magick_unreferenced(channel);
7585  magick_unreferenced(angle);
7586  magick_unreferenced(exception);
7587
7588  return((Image *) NULL);
7589}
7590
7591MagickExport Image *AccelerateUnsharpMaskImage(
7592  const Image *magick_unused(image),const ChannelType magick_unused(channel),
7593  const double magick_unused(radius),const double magick_unused(sigma),
7594  const double magick_unused(gain),const double magick_unused(threshold),
7595  ExceptionInfo *magick_unused(exception))
7596{
7597  magick_unreferenced(image);
7598  magick_unreferenced(channel);
7599  magick_unreferenced(radius);
7600  magick_unreferenced(sigma);
7601  magick_unreferenced(gain);
7602  magick_unreferenced(threshold);
7603  magick_unreferenced(exception);
7604
7605  return((Image *) NULL);
7606}
7607
7608#endif /* MAGICKCORE_OPENCL_SUPPORT */