accelerate.c revision a7d56f20e34f4d8ff05f9a3cbcce8789e27667cc
1/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3%                                                                             %
4%                                                                             %
5%                                                                             %
6%     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
7%    A   A   C       C      E      L      E      R   R  A   A    T    E       %
8%    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
9%    A   A   C       C      E      L      E      R R    A   A    T    E       %
10%    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
11%                                                                             %
12%                                                                             %
13%                       MagickCore Acceleration Methods                       %
14%                                                                             %
15%                              Software Design                                %
16%                                  Cristy                                     %
17%                               SiuChi Chan                                   %
18%                              Guansong Zhang                                 %
19%                               January 2010                                  %
20%                               Dirk Lemstra                                  %
21%                                April 2016                                   %
22%                                                                             %
23%                                                                             %
24%  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
25%  dedicated to making software imaging solutions freely available.           %
26%                                                                             %
27%  You may not use this file except in compliance with the License.  You may  %
28%  obtain a copy of the License at                                            %
29%                                                                             %
30%    http://www.imagemagick.org/script/license.php                            %
31%                                                                             %
32%  Unless required by applicable law or agreed to in writing, software        %
33%  distributed under the License is distributed on an "AS IS" BASIS,          %
34%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
35%  See the License for the specific language governing permissions and        %
36%  limitations under the License.                                             %
37%                                                                             %
38%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39*/
40
41/*
42Include declarations.
43*/
44#include "MagickCore/studio.h"
45#include "MagickCore/accelerate-private.h"
46#include "MagickCore/accelerate-kernels-private.h"
47#include "MagickCore/artifact.h"
48#include "MagickCore/cache.h"
49#include "MagickCore/cache-private.h"
50#include "MagickCore/cache-view.h"
51#include "MagickCore/color-private.h"
52#include "MagickCore/delegate-private.h"
53#include "MagickCore/enhance.h"
54#include "MagickCore/exception.h"
55#include "MagickCore/exception-private.h"
56#include "MagickCore/gem.h"
57#include "MagickCore/image.h"
58#include "MagickCore/image-private.h"
59#include "MagickCore/linked-list.h"
60#include "MagickCore/list.h"
61#include "MagickCore/memory_.h"
62#include "MagickCore/monitor-private.h"
63#include "MagickCore/opencl.h"
64#include "MagickCore/opencl-private.h"
65#include "MagickCore/option.h"
66#include "MagickCore/pixel-accessor.h"
67#include "MagickCore/pixel-private.h"
68#include "MagickCore/prepress.h"
69#include "MagickCore/quantize.h"
70#include "MagickCore/quantum-private.h"
71#include "MagickCore/random_.h"
72#include "MagickCore/random-private.h"
73#include "MagickCore/registry.h"
74#include "MagickCore/resize.h"
75#include "MagickCore/resize-private.h"
76#include "MagickCore/semaphore.h"
77#include "MagickCore/splay-tree.h"
78#include "MagickCore/statistic.h"
79#include "MagickCore/string_.h"
80#include "MagickCore/string-private.h"
81#include "MagickCore/token.h"
82
83#define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
84#define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
85
86#if defined(MAGICKCORE_OPENCL_SUPPORT)
87
88/*
89  Define declarations.
90*/
91#define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
92
93/*
94  Static declarations.
95*/
96static const ResizeWeightingFunctionType supportedResizeWeighting[] =
97{
98  BoxWeightingFunction,
99  TriangleWeightingFunction,
100  HannWeightingFunction,
101  HammingWeightingFunction,
102  BlackmanWeightingFunction,
103  CubicBCWeightingFunction,
104  SincWeightingFunction,
105  SincFastWeightingFunction,
106  LastWeightingFunction
107};
108
109/*
110  Helper functions.
111*/
112static MagickBooleanType checkAccelerateCondition(const Image* image)
113{
114  /* check if the image's colorspace is supported */
115  if (image->colorspace != RGBColorspace &&
116      image->colorspace != sRGBColorspace &&
117      image->colorspace != GRAYColorspace)
118    return(MagickFalse);
119
120  /* check if the virtual pixel method is compatible with the OpenCL implementation */
121  if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
122      (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
123    return(MagickFalse);
124
125  /* check if the image has read / write mask */
126  if (image->read_mask != MagickFalse || image->write_mask != MagickFalse)
127    return(MagickFalse);
128
129  if (image->number_channels > 4)
130    return(MagickFalse);
131
132  /* check if pixel order is R */
133  if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
134    return(MagickFalse);
135
136  if (image->number_channels == 1)
137    return(MagickTrue);
138
139  /* check if pixel order is RA */
140  if ((image->number_channels == 2) &&
141      (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
142    return(MagickTrue);
143
144  if (image->number_channels == 2)
145    return(MagickFalse);
146
147  /* check if pixel order is RGB */
148  if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
149      (GetPixelChannelOffset(image,BluePixelChannel) != 2))
150    return(MagickFalse);
151
152  if (image->number_channels == 3)
153    return(MagickTrue);
154
155  /* check if pixel order is RGBA */
156  if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
157    return(MagickFalse);
158
159  return(MagickTrue);
160}
161
162static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
163{
164  if (checkAccelerateCondition(image) == MagickFalse)
165    return(MagickFalse);
166
167  /* the order will be RGBA if the image has 4 channels */
168  if (image->number_channels != 4)
169    return(MagickFalse);
170
171  if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
172      (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
173      (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
174      (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
175    return(MagickFalse);
176
177  return(MagickTrue);
178}
179
180static MagickBooleanType checkPixelIntensity(const Image *image,
181  const PixelIntensityMethod method)
182{
183  /* EncodePixelGamma and DecodePixelGamma are not supported */
184  if ((method == Rec601LumaPixelIntensityMethod) ||
185      (method == Rec709LumaPixelIntensityMethod))
186    {
187      if (image->colorspace == RGBColorspace)
188        return(MagickFalse);
189    }
190
191  if ((method == Rec601LuminancePixelIntensityMethod) ||
192      (method == Rec709LuminancePixelIntensityMethod))
193    {
194      if (image->colorspace == sRGBColorspace)
195        return(MagickFalse);
196    }
197
198  return(MagickTrue);
199}
200
201static MagickBooleanType checkHistogramCondition(const Image *image,
202  const PixelIntensityMethod method)
203{
204  /* ensure this is the only pass get in for now. */
205  if ((image->channel_mask & SyncChannels) == 0)
206    return MagickFalse;
207
208  return(checkPixelIntensity(image,method));
209}
210
211static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
212{
213  MagickCLEnv
214    clEnv;
215
216  clEnv=GetCurrentOpenCLEnv();
217  if (clEnv == (MagickCLEnv) NULL)
218    return((MagickCLEnv) NULL);
219
220  if (clEnv->enabled == MagickFalse)
221    return((MagickCLEnv) NULL);
222
223  if (InitializeOpenCL(clEnv,exception) == MagickFalse)
224    return((MagickCLEnv) NULL);
225
226  return(clEnv);
227}
228
229static inline void copyPixels(const Image *source,Image *destination,
230  ExceptionInfo *exception)
231{
232  RectangleInfo
233    geometry;
234
235  OffsetInfo
236    offset;
237
238  /* The pixels only need to be copied when a channel mask has been set */
239  if (((source->channel_mask & RedChannel) != 0) &&
240      ((source->channel_mask & GreenChannel) != 0) &&
241      ((source->channel_mask & BlueChannel) != 0) &&
242      ((source->channel_mask & AlphaChannel) != 0))
243    return;
244
245  geometry.width=source->columns;
246  geometry.height=source->rows;
247  geometry.x=geometry.y=offset.x=offset.y=0;
248  CopyImagePixels(destination,source,&geometry,&offset,exception);
249}
250
251/* pad the global workgroup size to the next multiple of
252   the local workgroup size */
253inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
254  const unsigned int orgGlobalSize,const unsigned int localGroupSize)
255{
256  return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
257}
258
259static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
260  const double sigma,cl_uint *width,ExceptionInfo *exception)
261{
262  char
263    geometry[MagickPathExtent];
264
265  cl_int
266    status;
267
268  cl_mem
269    imageKernelBuffer;
270
271  float
272    *kernelBufferPtr;
273
274  KernelInfo
275    *kernel;
276
277  ssize_t
278    i;
279
280  (void) FormatLocaleString(geometry,MagickPathExtent,
281    "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
282  kernel=AcquireKernelInfo(geometry,exception);
283  if (kernel == (KernelInfo *) NULL)
284  {
285    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
286      ResourceLimitWarning,"AcquireKernelInfo failed.",".");
287    return((cl_mem) NULL);
288  }
289  kernelBufferPtr=(float *)AcquireMagickMemory(kernel->width*
290    sizeof(*kernelBufferPtr));
291  for (i = 0; i < (ssize_t) kernel->width; i++)
292    kernelBufferPtr[i] = (float)kernel->values[i];
293  imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
294    CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
295  *width=kernel->width;
296  kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
297  kernel=DestroyKernelInfo(kernel);
298  if (imageKernelBuffer == (cl_mem) NULL)
299    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
300      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
301  return(imageKernelBuffer);
302}
303
304static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
305  MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
306  cl_mem histogramBuffer,Image *image,const ChannelType channel,
307  ExceptionInfo *exception)
308{
309  MagickBooleanType
310    outputReady;
311
312  cl_int
313    clStatus;
314
315  cl_kernel
316    histogramKernel;
317
318  cl_event
319    event;
320
321  cl_uint
322    colorspace,
323    method;
324
325  register ssize_t
326    i;
327
328  size_t
329    global_work_size[2];
330
331  histogramKernel = NULL;
332
333  outputReady = MagickFalse;
334  colorspace = image->colorspace;
335  method = image->intensity;
336
337  /* get the OpenCL kernel */
338  histogramKernel = AcquireOpenCLKernel(device,"Histogram");
339  if (histogramKernel == NULL)
340  {
341    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
342    goto cleanup;
343  }
344
345  /* set the kernel arguments */
346  i = 0;
347  clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
348  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
349  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
350  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
351  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
352  if (clStatus != CL_SUCCESS)
353  {
354    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
355    goto cleanup;
356  }
357
358  /* launch the kernel */
359  global_work_size[0] = image->columns;
360  global_work_size[1] = image->rows;
361
362  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
363
364  if (clStatus != CL_SUCCESS)
365  {
366    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
367    goto cleanup;
368  }
369  RecordProfileData(device,histogramKernel,event);
370
371  outputReady = MagickTrue;
372
373cleanup:
374
375  if (histogramKernel!=NULL)
376    ReleaseOpenCLKernel(histogramKernel);
377
378  return(outputReady);
379}
380
381/*
382%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
383%                                                                             %
384%                                                                             %
385%                                                                             %
386%     A c c e l e r a t e A d d N o i s e I m a g e                           %
387%                                                                             %
388%                                                                             %
389%                                                                             %
390%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
391*/
392
393static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
394  const NoiseType noise_type,ExceptionInfo *exception)
395{
396  cl_command_queue
397    queue;
398
399  cl_float
400    attenuate;
401
402  cl_int
403    status;
404
405  cl_kernel
406    addNoiseKernel;
407
408  cl_mem
409    filteredImageBuffer,
410    imageBuffer;
411
412  cl_uint
413    bufferLength,
414    inputPixelCount,
415    number_channels,
416    numRandomNumberPerPixel,
417    pixelsPerWorkitem,
418    seed0,
419    seed1,
420    workItemCount;
421
422  const char
423    *option;
424
425  const unsigned long
426    *s;
427
428  MagickBooleanType
429    outputReady;
430
431  MagickCLDevice
432    device;
433
434  Image
435    *filteredImage;
436
437  RandomInfo
438    *randomInfo;
439
440  size_t
441    gsize[1],
442    i,
443    lsize[1],
444    numRandPerChannel;
445
446  filteredImage=NULL;
447  addNoiseKernel=NULL;
448  outputReady=MagickFalse;
449
450  device=RequestOpenCLDevice(clEnv);
451  queue=AcquireOpenCLCommandQueue(device);
452  if (queue == (cl_command_queue) NULL)
453    goto cleanup;
454  filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
455    exception);
456  if (filteredImage == (Image *) NULL)
457    goto cleanup;
458  if (filteredImage->number_channels != image->number_channels)
459    goto cleanup;
460  copyPixels(image,filteredImage,exception);
461  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
462  if (imageBuffer == (cl_mem) NULL)
463    goto cleanup;
464  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
465  if (filteredImageBuffer == (cl_mem) NULL)
466    goto cleanup;
467
468  /* find out how many random numbers needed by pixel */
469  numRandPerChannel=0;
470  numRandomNumberPerPixel=0;
471  switch (noise_type)
472  {
473    case UniformNoise:
474    case ImpulseNoise:
475    case LaplacianNoise:
476    case RandomNoise:
477    default:
478      numRandPerChannel=1;
479      break;
480    case GaussianNoise:
481    case MultiplicativeGaussianNoise:
482    case PoissonNoise:
483      numRandPerChannel=2;
484      break;
485  };
486  if (GetPixelRedTraits(image) != UndefinedPixelTrait)
487    numRandomNumberPerPixel+=numRandPerChannel;
488  if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
489    numRandomNumberPerPixel+=numRandPerChannel;
490  if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
491    numRandomNumberPerPixel+=numRandPerChannel;
492  if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
493    numRandomNumberPerPixel+=numRandPerChannel;
494
495  addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
496  if (addNoiseKernel == (cl_kernel) NULL)
497  {
498    (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
499      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
500    goto cleanup;
501  }
502
503  /* 256 work items per group, 2 groups per CU */
504  workItemCount=device->max_compute_units*2*256;
505  inputPixelCount=(cl_int) (image->columns*image->rows);
506  pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
507  pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
508  lsize[0]=256;
509  gsize[0]=workItemCount;
510
511  randomInfo=AcquireRandomInfo();
512  s=GetRandomInfoSeed(randomInfo);
513  seed0=s[0];
514  (void) GetPseudoRandomValue(randomInfo);
515  seed1=s[0];
516  randomInfo=DestroyRandomInfo(randomInfo);
517
518  number_channels=(cl_uint) image->number_channels;
519  bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
520  attenuate=1.0f;
521  option=GetImageArtifact(image,"attenuate");
522  if (option != (char *) NULL)
523    attenuate=(float)StringToDouble(option,(char **) NULL);
524
525  i=0;
526  status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
527  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
528  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
529  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
530  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
531  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
532  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&attenuate);
533  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
534  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
535  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
536  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
537  if (status != CL_SUCCESS)
538  {
539    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
540      ResourceLimitWarning,"clSetKernelArg failed.",".");
541    goto cleanup;
542  }
543
544  outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
545    lsize,image,filteredImage,exception);
546
547cleanup:
548
549  if (addNoiseKernel != (cl_kernel) NULL)
550    ReleaseOpenCLKernel(addNoiseKernel);
551  if (queue != (cl_command_queue) NULL)
552    ReleaseOpenCLCommandQueue(device,queue);
553  if (device != (MagickCLDevice) NULL)
554    ReleaseOpenCLDevice(device);
555  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
556    filteredImage=DestroyImage(filteredImage);
557
558  return(filteredImage);
559}
560
561MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
562  const NoiseType noise_type,ExceptionInfo *exception)
563{
564  Image
565    *filteredImage;
566
567  MagickCLEnv
568    clEnv;
569
570  assert(image != NULL);
571  assert(exception != (ExceptionInfo *) NULL);
572
573  if (checkAccelerateCondition(image) == MagickFalse)
574    return((Image *) NULL);
575
576  clEnv=getOpenCLEnvironment(exception);
577  if (clEnv == (MagickCLEnv) NULL)
578    return((Image *) NULL);
579
580  filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,exception);
581  return(filteredImage);
582}
583
584/*
585%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
586%                                                                             %
587%                                                                             %
588%                                                                             %
589%     A c c e l e r a t e B l u r I m a g e                                   %
590%                                                                             %
591%                                                                             %
592%                                                                             %
593%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
594*/
595
596static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
597  const double radius,const double sigma,ExceptionInfo *exception)
598{
599  cl_command_queue
600    queue;
601
602  cl_int
603    status;
604
605  cl_kernel
606    blurColumnKernel,
607    blurRowKernel;
608
609  cl_mem
610    filteredImageBuffer,
611    imageBuffer,
612    imageKernelBuffer,
613    tempImageBuffer;
614
615  cl_uint
616    imageColumns,
617    imageRows,
618    kernelWidth,
619    number_channels;
620
621  Image
622    *filteredImage;
623
624  MagickBooleanType
625    outputReady;
626
627  MagickCLDevice
628    device;
629
630  MagickSizeType
631    length;
632
633  size_t
634    chunkSize=256,
635    gsize[2],
636    i,
637    lsize[2];
638
639  filteredImage=NULL;
640  tempImageBuffer=NULL;
641  imageKernelBuffer=NULL;
642  blurRowKernel=NULL;
643  blurColumnKernel=NULL;
644  outputReady=MagickFalse;
645
646  device=RequestOpenCLDevice(clEnv);
647  queue=AcquireOpenCLCommandQueue(device);
648  filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
649    exception);
650  if (filteredImage == (Image *) NULL)
651    goto cleanup;
652  if (filteredImage->number_channels != image->number_channels)
653    goto cleanup;
654  copyPixels(image,filteredImage,exception);
655  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
656  if (imageBuffer == (cl_mem) NULL)
657    goto cleanup;
658  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
659  if (filteredImageBuffer == (cl_mem) NULL)
660    goto cleanup;
661
662  imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
663    exception);
664  if (imageKernelBuffer == (cl_mem) NULL)
665    goto cleanup;
666
667  length=image->columns*image->rows;
668  tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
669    sizeof(cl_float4),(void *) NULL);
670  if (tempImageBuffer == (cl_mem) NULL)
671    goto cleanup;
672
673  blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
674  if (blurRowKernel == (cl_kernel) NULL)
675  {
676    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
677      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
678    goto cleanup;
679  }
680
681  number_channels=(cl_uint) image->number_channels;
682  imageColumns=(cl_uint) image->columns;
683  imageRows=(cl_uint) image->rows;
684
685  i=0;
686  status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
687  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
688  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
689  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
690  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
691  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
692  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
693  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
694  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
695  if (status != CL_SUCCESS)
696  {
697    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
698      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
699    goto cleanup;
700  }
701
702  gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
703  gsize[1]=image->rows;
704  lsize[0]=chunkSize;
705  lsize[1]=1;
706
707  outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
708    lsize,image,filteredImage,exception);
709  if (outputReady == MagickFalse)
710    goto cleanup;
711
712  blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
713  if (blurColumnKernel == (cl_kernel) NULL)
714  {
715    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
716      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
717    goto cleanup;
718  }
719
720  i=0;
721  status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
722  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
723  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
724  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
725  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
726  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
727  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
728  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
729  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
730  if (status != CL_SUCCESS)
731  {
732    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
733      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
734    goto cleanup;
735  }
736
737  gsize[0]=image->columns;
738  gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
739  lsize[0]=1;
740  lsize[1]=chunkSize;
741
742  outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
743    lsize,image,filteredImage,exception);
744
745cleanup:
746
747  if (tempImageBuffer != (cl_mem) NULL)
748    ReleaseOpenCLMemObject(tempImageBuffer);
749  if (imageKernelBuffer != (cl_mem) NULL)
750    ReleaseOpenCLMemObject(imageKernelBuffer);
751  if (blurRowKernel != (cl_kernel) NULL)
752    ReleaseOpenCLKernel(blurRowKernel);
753  if (blurColumnKernel != (cl_kernel) NULL)
754    ReleaseOpenCLKernel(blurColumnKernel);
755  if (queue != (cl_command_queue) NULL)
756    ReleaseOpenCLCommandQueue(device,queue);
757  if (device != (MagickCLDevice) NULL)
758    ReleaseOpenCLDevice(device);
759  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
760    filteredImage=DestroyImage(filteredImage);
761
762  return(filteredImage);
763}
764
765MagickPrivate Image* AccelerateBlurImage(const Image *image,
766  const double radius,const double sigma,ExceptionInfo *exception)
767{
768  Image
769    *filteredImage;
770
771  MagickCLEnv
772    clEnv;
773
774  assert(image != NULL);
775  assert(exception != (ExceptionInfo *) NULL);
776
777  if (checkAccelerateCondition(image) == MagickFalse)
778    return((Image *) NULL);
779
780  clEnv=getOpenCLEnvironment(exception);
781  if (clEnv == (MagickCLEnv) NULL)
782    return((Image *) NULL);
783
784  filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
785  return(filteredImage);
786}
787
788/*
789%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
790%                                                                             %
791%                                                                             %
792%                                                                             %
793%     A c c e l e r a t e C o n t r a s t I m a g e                           %
794%                                                                             %
795%                                                                             %
796%                                                                             %
797%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
798*/
799
800static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
801  const MagickBooleanType sharpen,ExceptionInfo *exception)
802{
803  CacheView
804    *image_view;
805
806  cl_command_queue
807    queue;
808
809  cl_int
810    clStatus;
811
812  cl_kernel
813    filterKernel;
814
815  cl_event
816    event;
817
818  cl_mem
819    imageBuffer;
820
821  cl_mem_flags
822    mem_flags;
823
824  MagickBooleanType
825    outputReady;
826
827  MagickCLDevice
828    device;
829
830  MagickSizeType
831    length;
832
833  size_t
834    global_work_size[2];
835
836  unsigned int
837    i,
838    uSharpen;
839
840  void
841    *inputPixels;
842
843  outputReady = MagickFalse;
844  inputPixels = NULL;
845  imageBuffer = NULL;
846  filterKernel = NULL;
847  queue = NULL;
848
849  device = RequestOpenCLDevice(clEnv);
850
851  /* Create and initialize OpenCL buffers. */
852  image_view=AcquireAuthenticCacheView(image,exception);
853  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
854  if (inputPixels == (void *) NULL)
855  {
856    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
857    goto cleanup;
858  }
859
860  /* If the host pointer is aligned to the size of CLPixelPacket,
861     then use the host buffer directly from the GPU; otherwise,
862     create a buffer on the GPU and copy the data over */
863  if (ALIGNED(inputPixels,CLPixelPacket))
864  {
865    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
866  }
867  else
868  {
869    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
870  }
871  /* create a CL buffer from image pixel buffer */
872  length = image->columns * image->rows;
873  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
874  if (clStatus != CL_SUCCESS)
875  {
876    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
877    goto cleanup;
878  }
879
880  filterKernel = AcquireOpenCLKernel(device,"Contrast");
881  if (filterKernel == NULL)
882  {
883    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
884    goto cleanup;
885  }
886
887  i = 0;
888  clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
889
890  uSharpen = (sharpen == MagickFalse)?0:1;
891  clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
892  if (clStatus != CL_SUCCESS)
893  {
894    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
895    goto cleanup;
896  }
897
898  global_work_size[0] = image->columns;
899  global_work_size[1] = image->rows;
900  /* launch the kernel */
901  queue = AcquireOpenCLCommandQueue(device);
902  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
903  if (clStatus != CL_SUCCESS)
904  {
905    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
906    goto cleanup;
907  }
908  RecordProfileData(device,filterKernel,event);
909
910  if (ALIGNED(inputPixels,CLPixelPacket))
911  {
912    length = image->columns * image->rows;
913    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
914  }
915  else
916  {
917    length = image->columns * image->rows;
918    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
919  }
920  if (clStatus != CL_SUCCESS)
921  {
922    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
923    goto cleanup;
924  }
925  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
926
927cleanup:
928
929  image_view=DestroyCacheView(image_view);
930
931  if (imageBuffer!=NULL)
932    clEnv->library->clReleaseMemObject(imageBuffer);
933  if (filterKernel!=NULL)
934    ReleaseOpenCLKernel(filterKernel);
935  if (queue != NULL)
936    ReleaseOpenCLCommandQueue(device,queue);
937  if (device != NULL)
938    ReleaseOpenCLDevice(device);
939
940  return(outputReady);
941}
942
943MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
944  const MagickBooleanType sharpen,ExceptionInfo *exception)
945{
946  MagickBooleanType
947    status;
948
949  MagickCLEnv
950    clEnv;
951
952  assert(image != NULL);
953  assert(exception != (ExceptionInfo *) NULL);
954
955  if (checkAccelerateConditionRGBA(image) == MagickFalse)
956    return(MagickFalse);
957
958  clEnv=getOpenCLEnvironment(exception);
959  if (clEnv == (MagickCLEnv) NULL)
960    return(MagickFalse);
961
962  status=ComputeContrastImage(image,clEnv,sharpen,exception);
963  return(status);
964}
965
966/*
967%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
968%                                                                             %
969%                                                                             %
970%                                                                             %
971%     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
972%                                                                             %
973%                                                                             %
974%                                                                             %
975%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
976*/
977
978static MagickBooleanType ComputeContrastStretchImage(Image *image,
979  MagickCLEnv clEnv,const double black_point,const double white_point,
980  ExceptionInfo *exception)
981{
982#define ContrastStretchImageTag  "ContrastStretch/Image"
983#define MaxRange(color)  ((MagickRealType) ScaleQuantumToMap((Quantum) (color)))
984
985  CacheView
986    *image_view;
987
988  cl_command_queue
989    queue;
990
991  cl_int
992    clStatus;
993
994  cl_mem_flags
995    mem_flags;
996
997  cl_mem
998    histogramBuffer,
999    imageBuffer,
1000    stretchMapBuffer;
1001
1002  cl_kernel
1003    histogramKernel,
1004    stretchKernel;
1005
1006  cl_event
1007    event;
1008
1009  cl_uint4
1010    *histogram;
1011
1012  double
1013    intensity;
1014
1015  FloatPixelPacket
1016    black,
1017    white;
1018
1019  MagickBooleanType
1020    outputReady,
1021    status;
1022
1023  MagickCLDevice
1024    device;
1025
1026  MagickSizeType
1027    length;
1028
1029  PixelPacket
1030    *stretch_map;
1031
1032  register ssize_t
1033    i;
1034
1035  size_t
1036    global_work_size[2];
1037
1038  void
1039    *hostPtr,
1040    *inputPixels;
1041
1042  histogram=NULL;
1043  stretch_map=NULL;
1044  inputPixels = NULL;
1045  imageBuffer = NULL;
1046  histogramBuffer = NULL;
1047  stretchMapBuffer = NULL;
1048  histogramKernel = NULL;
1049  stretchKernel = NULL;
1050  queue = NULL;
1051  outputReady = MagickFalse;
1052
1053
1054  assert(image != (Image *) NULL);
1055  assert(image->signature == MagickCoreSignature);
1056  if (image->debug != MagickFalse)
1057    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1058
1059  //exception=(&image->exception);
1060
1061  /*
1062   * initialize opencl env
1063   */
1064  device = RequestOpenCLDevice(clEnv);
1065  queue = AcquireOpenCLCommandQueue(device);
1066
1067  /*
1068    Allocate and initialize histogram arrays.
1069  */
1070  histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1071
1072  if (histogram == (cl_uint4 *) NULL)
1073    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1074
1075  /* reset histogram */
1076  (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
1077
1078  /*
1079  if (IsGrayImage(image,exception) != MagickFalse)
1080    (void) SetImageColorspace(image,GRAYColorspace);
1081  */
1082
1083  status=MagickTrue;
1084
1085
1086  /*
1087    Form histogram.
1088  */
1089  /* Create and initialize OpenCL buffers. */
1090  /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1091  /* assume this  will get a writable image */
1092  image_view=AcquireAuthenticCacheView(image,exception);
1093  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1094
1095  if (inputPixels == (void *) NULL)
1096  {
1097    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1098    goto cleanup;
1099  }
1100  /* If the host pointer is aligned to the size of CLPixelPacket,
1101     then use the host buffer directly from the GPU; otherwise,
1102     create a buffer on the GPU and copy the data over */
1103  if (ALIGNED(inputPixels,CLPixelPacket))
1104  {
1105    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1106  }
1107  else
1108  {
1109    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1110  }
1111  /* create a CL buffer from image pixel buffer */
1112  length = image->columns * image->rows;
1113  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1114  if (clStatus != CL_SUCCESS)
1115  {
1116    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1117    goto cleanup;
1118  }
1119
1120  /* If the host pointer is aligned to the size of cl_uint,
1121     then use the host buffer directly from the GPU; otherwise,
1122     create a buffer on the GPU and copy the data over */
1123  if (ALIGNED(histogram,cl_uint4))
1124  {
1125    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1126    hostPtr = histogram;
1127  }
1128  else
1129  {
1130    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1131    hostPtr = histogram;
1132  }
1133  /* create a CL buffer for histogram  */
1134  length = (MaxMap+1);
1135  histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
1136  if (clStatus != CL_SUCCESS)
1137  {
1138    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1139    goto cleanup;
1140  }
1141
1142  status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
1143  if (status == MagickFalse)
1144    goto cleanup;
1145
1146  /* read from the kenel output */
1147  if (ALIGNED(histogram,cl_uint4))
1148  {
1149    length = (MaxMap+1);
1150    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
1151  }
1152  else
1153  {
1154    length = (MaxMap+1);
1155    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
1156  }
1157  if (clStatus != CL_SUCCESS)
1158  {
1159    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1160    goto cleanup;
1161  }
1162
1163  /* unmap, don't block gpu to use this buffer again.  */
1164  if (ALIGNED(histogram,cl_uint4))
1165  {
1166    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1167    if (clStatus != CL_SUCCESS)
1168    {
1169      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1170      goto cleanup;
1171    }
1172  }
1173
1174  /* recreate input buffer later, in case image updated */
1175#ifdef RECREATEBUFFER
1176  if (imageBuffer!=NULL)
1177    clEnv->library->clReleaseMemObject(imageBuffer);
1178#endif
1179
1180  /* CPU stuff */
1181  /*
1182     Find the histogram boundaries by locating the black/white levels.
1183  */
1184  black.red=0.0;
1185  white.red=MaxRange(QuantumRange);
1186  if ((image->channel_mask & RedChannel) != 0)
1187  {
1188    intensity=0.0;
1189    for (i=0; i <= (ssize_t) MaxMap; i++)
1190    {
1191      intensity+=histogram[i].s[2];
1192      if (intensity > black_point)
1193        break;
1194    }
1195    black.red=(MagickRealType) i;
1196    intensity=0.0;
1197    for (i=(ssize_t) MaxMap; i != 0; i--)
1198    {
1199      intensity+=histogram[i].s[2];
1200      if (intensity > ((double) image->columns*image->rows-white_point))
1201        break;
1202    }
1203    white.red=(MagickRealType) i;
1204  }
1205  black.green=0.0;
1206  white.green=MaxRange(QuantumRange);
1207  if ((image->channel_mask & GreenChannel) != 0)
1208  {
1209    intensity=0.0;
1210    for (i=0; i <= (ssize_t) MaxMap; i++)
1211    {
1212      intensity+=histogram[i].s[2];
1213      if (intensity > black_point)
1214        break;
1215    }
1216    black.green=(MagickRealType) i;
1217    intensity=0.0;
1218    for (i=(ssize_t) MaxMap; i != 0; i--)
1219    {
1220      intensity+=histogram[i].s[2];
1221      if (intensity > ((double) image->columns*image->rows-white_point))
1222        break;
1223    }
1224    white.green=(MagickRealType) i;
1225  }
1226  black.blue=0.0;
1227  white.blue=MaxRange(QuantumRange);
1228  if ((image->channel_mask & BlueChannel) != 0)
1229  {
1230    intensity=0.0;
1231    for (i=0; i <= (ssize_t) MaxMap; i++)
1232    {
1233      intensity+=histogram[i].s[2];
1234      if (intensity > black_point)
1235        break;
1236    }
1237    black.blue=(MagickRealType) i;
1238    intensity=0.0;
1239    for (i=(ssize_t) MaxMap; i != 0; i--)
1240    {
1241      intensity+=histogram[i].s[2];
1242      if (intensity > ((double) image->columns*image->rows-white_point))
1243        break;
1244    }
1245    white.blue=(MagickRealType) i;
1246  }
1247  black.alpha=0.0;
1248  white.alpha=MaxRange(QuantumRange);
1249  if ((image->channel_mask & AlphaChannel) != 0)
1250  {
1251    intensity=0.0;
1252    for (i=0; i <= (ssize_t) MaxMap; i++)
1253    {
1254      intensity+=histogram[i].s[2];
1255      if (intensity > black_point)
1256        break;
1257    }
1258    black.alpha=(MagickRealType) i;
1259    intensity=0.0;
1260    for (i=(ssize_t) MaxMap; i != 0; i--)
1261    {
1262      intensity+=histogram[i].s[2];
1263      if (intensity > ((double) image->columns*image->rows-white_point))
1264        break;
1265    }
1266    white.alpha=(MagickRealType) i;
1267  }
1268  /*
1269  black.index=0.0;
1270  white.index=MaxRange(QuantumRange);
1271  if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace))
1272  {
1273    intensity=0.0;
1274    for (i=0; i <= (ssize_t) MaxMap; i++)
1275    {
1276      intensity+=histogram[i].index;
1277      if (intensity > black_point)
1278        break;
1279    }
1280    black.index=(MagickRealType) i;
1281    intensity=0.0;
1282    for (i=(ssize_t) MaxMap; i != 0; i--)
1283    {
1284      intensity+=histogram[i].index;
1285      if (intensity > ((double) image->columns*image->rows-white_point))
1286        break;
1287    }
1288    white.index=(MagickRealType) i;
1289  }
1290  */
1291
1292
1293  stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
1294    sizeof(*stretch_map));
1295
1296  if (stretch_map == (PixelPacket *) NULL)
1297    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1298      image->filename);
1299
1300  /*
1301    Stretch the histogram to create the stretched image mapping.
1302  */
1303  (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
1304  for (i=0; i <= (ssize_t) MaxMap; i++)
1305  {
1306    if ((image->channel_mask & RedChannel) != 0)
1307    {
1308      if (i < (ssize_t) black.red)
1309        stretch_map[i].red=(Quantum) 0;
1310      else
1311        if (i > (ssize_t) white.red)
1312          stretch_map[i].red=QuantumRange;
1313        else
1314          if (black.red != white.red)
1315            stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1316                  (i-black.red)/(white.red-black.red)));
1317    }
1318    if ((image->channel_mask & GreenChannel) != 0)
1319    {
1320      if (i < (ssize_t) black.green)
1321        stretch_map[i].green=0;
1322      else
1323        if (i > (ssize_t) white.green)
1324          stretch_map[i].green=QuantumRange;
1325        else
1326          if (black.green != white.green)
1327            stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1328                  (i-black.green)/(white.green-black.green)));
1329    }
1330    if ((image->channel_mask & BlueChannel) != 0)
1331    {
1332      if (i < (ssize_t) black.blue)
1333        stretch_map[i].blue=0;
1334      else
1335        if (i > (ssize_t) white.blue)
1336          stretch_map[i].blue= QuantumRange;
1337        else
1338          if (black.blue != white.blue)
1339            stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1340                  (i-black.blue)/(white.blue-black.blue)));
1341    }
1342    if ((image->channel_mask & AlphaChannel) != 0)
1343    {
1344      if (i < (ssize_t) black.alpha)
1345        stretch_map[i].alpha=0;
1346      else
1347        if (i > (ssize_t) white.alpha)
1348          stretch_map[i].alpha=QuantumRange;
1349        else
1350          if (black.alpha != white.alpha)
1351            stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
1352                  (i-black.alpha)/(white.alpha-black.alpha)));
1353    }
1354    /*
1355    if (((channel & IndexChannel) != 0) &&
1356        (image->colorspace == CMYKColorspace))
1357    {
1358      if (i < (ssize_t) black.index)
1359        stretch_map[i].index=0;
1360      else
1361        if (i > (ssize_t) white.index)
1362          stretch_map[i].index=QuantumRange;
1363        else
1364          if (black.index != white.index)
1365            stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap*
1366                  (i-black.index)/(white.index-black.index)));
1367    }
1368    */
1369  }
1370
1371  /*
1372    Stretch the image.
1373  */
1374  if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
1375      (image->colorspace == CMYKColorspace)))
1376    image->storage_class=DirectClass;
1377  if (image->storage_class == PseudoClass)
1378  {
1379    /*
1380       Stretch colormap.
1381       */
1382    for (i=0; i < (ssize_t) image->colors; i++)
1383    {
1384      if ((image->channel_mask & RedChannel) != 0)
1385      {
1386        if (black.red != white.red)
1387          image->colormap[i].red=stretch_map[
1388            ScaleQuantumToMap(image->colormap[i].red)].red;
1389      }
1390      if ((image->channel_mask & GreenChannel) != 0)
1391      {
1392        if (black.green != white.green)
1393          image->colormap[i].green=stretch_map[
1394            ScaleQuantumToMap(image->colormap[i].green)].green;
1395      }
1396      if ((image->channel_mask & BlueChannel) != 0)
1397      {
1398        if (black.blue != white.blue)
1399          image->colormap[i].blue=stretch_map[
1400            ScaleQuantumToMap(image->colormap[i].blue)].blue;
1401      }
1402      if ((image->channel_mask & AlphaChannel) != 0)
1403      {
1404        if (black.alpha != white.alpha)
1405          image->colormap[i].alpha=stretch_map[
1406            ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
1407      }
1408    }
1409  }
1410
1411  /*
1412    Stretch image.
1413  */
1414
1415
1416  /* GPU can work on this again, image and equalize map as input
1417    image:        uchar4 (CLPixelPacket)
1418    stretch_map:  uchar4 (PixelPacket)
1419    black, white: float4 (FloatPixelPacket) */
1420
1421#ifdef RECREATEBUFFER
1422  /* If the host pointer is aligned to the size of CLPixelPacket,
1423     then use the host buffer directly from the GPU; otherwise,
1424     create a buffer on the GPU and copy the data over */
1425  if (ALIGNED(inputPixels,CLPixelPacket))
1426  {
1427    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1428  }
1429  else
1430  {
1431    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1432  }
1433  /* create a CL buffer from image pixel buffer */
1434  length = image->columns * image->rows;
1435  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1436  if (clStatus != CL_SUCCESS)
1437  {
1438    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1439    goto cleanup;
1440  }
1441#endif
1442
1443  /* Create and initialize OpenCL buffers. */
1444  if (ALIGNED(stretch_map, PixelPacket))
1445  {
1446    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1447    hostPtr = stretch_map;
1448  }
1449  else
1450  {
1451    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1452    hostPtr = stretch_map;
1453  }
1454  /* create a CL buffer for stretch_map  */
1455  length = (MaxMap+1);
1456  stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
1457  if (clStatus != CL_SUCCESS)
1458  {
1459    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1460    goto cleanup;
1461  }
1462
1463  /* get the OpenCL kernel */
1464  stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
1465  if (stretchKernel == NULL)
1466  {
1467    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1468    goto cleanup;
1469  }
1470
1471  /* set the kernel arguments */
1472  i = 0;
1473  clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1474  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
1475  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1476  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&white);
1477  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&black);
1478  if (clStatus != CL_SUCCESS)
1479  {
1480    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1481    goto cleanup;
1482  }
1483
1484  /* launch the kernel */
1485  global_work_size[0] = image->columns;
1486  global_work_size[1] = image->rows;
1487
1488  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1489
1490  if (clStatus != CL_SUCCESS)
1491  {
1492    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1493    goto cleanup;
1494  }
1495  RecordProfileData(device,stretchKernel,event);
1496
1497  /* read the data back */
1498  if (ALIGNED(inputPixels,CLPixelPacket))
1499  {
1500    length = image->columns * image->rows;
1501    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1502  }
1503  else
1504  {
1505    length = image->columns * image->rows;
1506    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1507  }
1508  if (clStatus != CL_SUCCESS)
1509  {
1510    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1511    goto cleanup;
1512  }
1513
1514  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1515
1516cleanup:
1517
1518  image_view=DestroyCacheView(image_view);
1519
1520  if (imageBuffer!=NULL)
1521    clEnv->library->clReleaseMemObject(imageBuffer);
1522
1523  if (stretchMapBuffer!=NULL)
1524    clEnv->library->clReleaseMemObject(stretchMapBuffer);
1525  if (stretch_map!=NULL)
1526    stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1527  if (histogramBuffer!=NULL)
1528    clEnv->library->clReleaseMemObject(histogramBuffer);
1529  if (histogram!=NULL)
1530    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1531  if (histogramKernel!=NULL)
1532    ReleaseOpenCLKernel(histogramKernel);
1533  if (stretchKernel!=NULL)
1534    ReleaseOpenCLKernel(stretchKernel);
1535  if (queue != NULL)
1536    ReleaseOpenCLCommandQueue(device,queue);
1537  if (device != NULL)
1538    ReleaseOpenCLDevice(device);
1539
1540  return(outputReady);
1541}
1542
1543MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
1544  Image *image,const double black_point,const double white_point,
1545  ExceptionInfo *exception)
1546{
1547  MagickBooleanType
1548    status;
1549
1550  MagickCLEnv
1551    clEnv;
1552
1553  assert(image != NULL);
1554  assert(exception != (ExceptionInfo *) NULL);
1555
1556  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1557      (checkHistogramCondition(image,image->intensity) == MagickFalse))
1558    return(MagickFalse);
1559
1560  clEnv=getOpenCLEnvironment(exception);
1561  if (clEnv == (MagickCLEnv) NULL)
1562    return(MagickFalse);
1563
1564  status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
1565    exception);
1566  return(status);
1567}
1568
1569/*
1570%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1571%                                                                             %
1572%                                                                             %
1573%                                                                             %
1574%     A c c e l e r a t e C o n v o l v e I m a g e                           %
1575%                                                                             %
1576%                                                                             %
1577%                                                                             %
1578%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1579*/
1580
1581static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
1582  const KernelInfo *kernel,ExceptionInfo *exception)
1583{
1584  CacheView
1585    *filteredImage_view,
1586    *image_view;
1587
1588  cl_command_queue
1589    queue;
1590
1591  cl_event
1592    event;
1593
1594  cl_kernel
1595    clkernel;
1596
1597  cl_int
1598    clStatus;
1599
1600  cl_mem
1601    convolutionKernel,
1602    filteredImageBuffer,
1603    imageBuffer;
1604
1605  cl_mem_flags
1606    mem_flags;
1607
1608  const void
1609    *inputPixels;
1610
1611  float
1612    *kernelBufferPtr;
1613
1614  Image
1615    *filteredImage;
1616
1617  MagickBooleanType
1618    outputReady;
1619
1620  MagickCLDevice
1621    device;
1622
1623  MagickSizeType
1624    length;
1625
1626  size_t
1627    global_work_size[3],
1628    localGroupSize[3],
1629    localMemoryRequirement;
1630
1631  unsigned
1632    kernelSize;
1633
1634  unsigned int
1635    filterHeight,
1636    filterWidth,
1637    i,
1638    imageHeight,
1639    imageWidth,
1640    matte;
1641
1642  void
1643    *filteredPixels,
1644    *hostPtr;
1645
1646  /* intialize all CL objects to NULL */
1647  imageBuffer = NULL;
1648  filteredImageBuffer = NULL;
1649  convolutionKernel = NULL;
1650  clkernel = NULL;
1651  queue = NULL;
1652
1653  filteredImage = NULL;
1654  filteredImage_view = NULL;
1655  outputReady = MagickFalse;
1656
1657  device = RequestOpenCLDevice(clEnv);
1658
1659  image_view=AcquireAuthenticCacheView(image,exception);
1660  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1661  if (inputPixels == (const void *) NULL)
1662  {
1663    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1664    goto cleanup;
1665  }
1666
1667  /* Create and initialize OpenCL buffers. */
1668
1669  /* If the host pointer is aligned to the size of CLPixelPacket,
1670     then use the host buffer directly from the GPU; otherwise,
1671     create a buffer on the GPU and copy the data over */
1672  if (ALIGNED(inputPixels,CLPixelPacket))
1673  {
1674    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1675  }
1676  else
1677  {
1678    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1679  }
1680  /* create a CL buffer from image pixel buffer */
1681  length = image->columns * image->rows;
1682  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1683  if (clStatus != CL_SUCCESS)
1684  {
1685    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1686    goto cleanup;
1687  }
1688
1689  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
1690  assert(filteredImage != NULL);
1691  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1692  {
1693    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1694    goto cleanup;
1695  }
1696  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1697  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1698  if (filteredPixels == (void *) NULL)
1699  {
1700    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1701    goto cleanup;
1702  }
1703
1704  if (ALIGNED(filteredPixels,CLPixelPacket))
1705  {
1706    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1707    hostPtr = filteredPixels;
1708  }
1709  else
1710  {
1711    mem_flags = CL_MEM_WRITE_ONLY;
1712    hostPtr = NULL;
1713  }
1714  /* create a CL buffer from image pixel buffer */
1715  length = image->columns * image->rows;
1716  filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1717  if (clStatus != CL_SUCCESS)
1718  {
1719    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1720    goto cleanup;
1721  }
1722
1723  kernelSize = (unsigned int) (kernel->width * kernel->height);
1724  convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1725  if (clStatus != CL_SUCCESS)
1726  {
1727    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1728    goto cleanup;
1729  }
1730
1731  queue = AcquireOpenCLCommandQueue(device);
1732
1733  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1734          , 0, NULL, NULL, &clStatus);
1735  if (clStatus != CL_SUCCESS)
1736  {
1737    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1738    goto cleanup;
1739  }
1740  for (i = 0; i < kernelSize; i++)
1741  {
1742    kernelBufferPtr[i] = (float) kernel->values[i];
1743  }
1744  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1745  if (clStatus != CL_SUCCESS)
1746  {
1747    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1748    goto cleanup;
1749  }
1750
1751  /* Compute the local memory requirement for a 16x16 workgroup.
1752     If it's larger than 16k, reduce the workgroup size to 8x8 */
1753  localGroupSize[0] = 16;
1754  localGroupSize[1] = 16;
1755  localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1756    + kernel->width*kernel->height*sizeof(float);
1757
1758  if (localMemoryRequirement > device->local_memory_size)
1759  {
1760    localGroupSize[0] = 8;
1761    localGroupSize[1] = 8;
1762    localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1763      + kernel->width*kernel->height*sizeof(float);
1764  }
1765  if (localMemoryRequirement <= device->local_memory_size)
1766  {
1767    /* get the OpenCL kernel */
1768    clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
1769    if (clkernel == NULL)
1770    {
1771      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1772      goto cleanup;
1773    }
1774
1775    /* set the kernel arguments */
1776    i = 0;
1777    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1778    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1779    imageWidth = (unsigned int) image->columns;
1780    imageHeight = (unsigned int) image->rows;
1781    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1782    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1783    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1784    filterWidth = (unsigned int) kernel->width;
1785    filterHeight = (unsigned int) kernel->height;
1786    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1787    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1788    matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1789    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1790    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1791    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1792    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1793    if (clStatus != CL_SUCCESS)
1794    {
1795      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1796      goto cleanup;
1797    }
1798
1799    /* pad the global size to a multiple of the local work size dimension */
1800    global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1801    global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1802
1803    /* launch the kernel */
1804    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1805    if (clStatus != CL_SUCCESS)
1806    {
1807      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1808      goto cleanup;
1809    }
1810    RecordProfileData(device,clkernel,event);
1811  }
1812  else
1813  {
1814    /* get the OpenCL kernel */
1815    clkernel = AcquireOpenCLKernel(device,"Convolve");
1816    if (clkernel == NULL)
1817    {
1818      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1819      goto cleanup;
1820    }
1821
1822    /* set the kernel arguments */
1823    i = 0;
1824    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1825    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1826    imageWidth = (unsigned int) image->columns;
1827    imageHeight = (unsigned int) image->rows;
1828    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1829    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1830    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1831    filterWidth = (unsigned int) kernel->width;
1832    filterHeight = (unsigned int) kernel->height;
1833    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1834    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1835    matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1836    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1837    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1838    if (clStatus != CL_SUCCESS)
1839    {
1840      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1841      goto cleanup;
1842    }
1843
1844    localGroupSize[0] = 8;
1845    localGroupSize[1] = 8;
1846    global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1847    global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1848	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1849
1850    if (clStatus != CL_SUCCESS)
1851    {
1852      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1853      goto cleanup;
1854    }
1855  }
1856  RecordProfileData(device,clkernel,event);
1857
1858  if (ALIGNED(filteredPixels,CLPixelPacket))
1859  {
1860    length = image->columns * image->rows;
1861    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1862  }
1863  else
1864  {
1865    length = image->columns * image->rows;
1866    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1867  }
1868  if (clStatus != CL_SUCCESS)
1869  {
1870    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1871    goto cleanup;
1872  }
1873
1874  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1875
1876cleanup:
1877
1878  image_view=DestroyCacheView(image_view);
1879  if (filteredImage_view != NULL)
1880    filteredImage_view=DestroyCacheView(filteredImage_view);
1881  if (imageBuffer != NULL)
1882    clEnv->library->clReleaseMemObject(imageBuffer);
1883  if (filteredImageBuffer != NULL)
1884    clEnv->library->clReleaseMemObject(filteredImageBuffer);
1885  if (convolutionKernel != NULL)
1886    clEnv->library->clReleaseMemObject(convolutionKernel);
1887  if (clkernel != NULL)
1888    ReleaseOpenCLKernel(clkernel);
1889  if (queue != NULL)
1890    ReleaseOpenCLCommandQueue(device,queue);
1891  if (device != NULL)
1892    ReleaseOpenCLDevice(device);
1893  if (outputReady == MagickFalse)
1894  {
1895    if (filteredImage != NULL)
1896    {
1897      DestroyImage(filteredImage);
1898      filteredImage = NULL;
1899    }
1900  }
1901
1902  return(filteredImage);
1903}
1904
1905MagickPrivate Image *AccelerateConvolveImage(const Image *image,
1906  const KernelInfo *kernel,ExceptionInfo *exception)
1907{
1908  /* Temporary disabled due to access violation
1909
1910  Image
1911    *filteredImage;
1912
1913  assert(image != NULL);
1914  assert(kernel != (KernelInfo *) NULL);
1915  assert(exception != (ExceptionInfo *) NULL);
1916  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1917      (checkOpenCLEnvironment(exception) == MagickFalse))
1918    return((Image *) NULL);
1919
1920  filteredImage=ComputeConvolveImage(image,kernel,exception);
1921  return(filteredImage);
1922  */
1923  magick_unreferenced(image);
1924  magick_unreferenced(kernel);
1925  magick_unreferenced(exception);
1926  return((Image *)NULL);
1927}
1928
1929/*
1930%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1931%                                                                             %
1932%                                                                             %
1933%                                                                             %
1934%     A c c e l e r a t e D e s p e c k l e I m a g e                         %
1935%                                                                             %
1936%                                                                             %
1937%                                                                             %
1938%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1939*/
1940
1941static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
1942  ExceptionInfo*exception)
1943{
1944  static const int
1945    X[4] = {0, 1, 1,-1},
1946    Y[4] = {1, 0, 1, 1};
1947
1948  CacheView
1949    *filteredImage_view,
1950    *image_view;
1951
1952  cl_command_queue
1953    queue;
1954
1955  cl_int
1956    clStatus;
1957
1958  cl_kernel
1959    hullPass1,
1960    hullPass2;
1961
1962  cl_event
1963    event;
1964
1965  cl_mem_flags
1966    mem_flags;
1967
1968  cl_mem
1969    filteredImageBuffer,
1970    imageBuffer,
1971    tempImageBuffer[2];
1972
1973  const void
1974    *inputPixels;
1975
1976  Image
1977    *filteredImage;
1978
1979  int
1980    k,
1981    matte;
1982
1983  MagickBooleanType
1984    outputReady;
1985
1986  MagickCLDevice
1987    device;
1988
1989  MagickSizeType
1990    length;
1991
1992  size_t
1993    global_work_size[2];
1994
1995  unsigned int
1996    imageHeight,
1997    imageWidth;
1998
1999  void
2000    *filteredPixels,
2001    *hostPtr;
2002
2003  outputReady = MagickFalse;
2004  inputPixels = NULL;
2005  filteredImage = NULL;
2006  filteredImage_view = NULL;
2007  filteredPixels = NULL;
2008  imageBuffer = NULL;
2009  filteredImageBuffer = NULL;
2010  hullPass1 = NULL;
2011  hullPass2 = NULL;
2012  queue = NULL;
2013  tempImageBuffer[0] = tempImageBuffer[1] = NULL;
2014
2015  device = RequestOpenCLDevice(clEnv);
2016  queue = AcquireOpenCLCommandQueue(device);
2017
2018  image_view=AcquireAuthenticCacheView(image,exception);
2019  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2020  if (inputPixels == (void *) NULL)
2021  {
2022    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2023    goto cleanup;
2024  }
2025
2026  if (ALIGNED(inputPixels,CLPixelPacket))
2027  {
2028    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2029  }
2030  else
2031  {
2032    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2033  }
2034  /* create a CL buffer from image pixel buffer */
2035  length = image->columns * image->rows;
2036  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2037  if (clStatus != CL_SUCCESS)
2038  {
2039    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2040    goto cleanup;
2041  }
2042
2043  mem_flags = CL_MEM_READ_WRITE;
2044  length = image->columns * image->rows;
2045  for (k = 0; k < 2; k++)
2046  {
2047    tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
2048    if (clStatus != CL_SUCCESS)
2049    {
2050      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2051      goto cleanup;
2052    }
2053  }
2054
2055  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
2056  assert(filteredImage != NULL);
2057  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
2058  {
2059    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
2060    goto cleanup;
2061  }
2062  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
2063  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
2064  if (filteredPixels == (void *) NULL)
2065  {
2066    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2067    goto cleanup;
2068  }
2069
2070  if (ALIGNED(filteredPixels,CLPixelPacket))
2071  {
2072    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2073    hostPtr = filteredPixels;
2074  }
2075  else
2076  {
2077    mem_flags = CL_MEM_WRITE_ONLY;
2078    hostPtr = NULL;
2079  }
2080  /* create a CL buffer from image pixel buffer */
2081  length = image->columns * image->rows;
2082  filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2083  if (clStatus != CL_SUCCESS)
2084  {
2085    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2086    goto cleanup;
2087  }
2088
2089  hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
2090  hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
2091
2092  clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2093  clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2094  imageWidth = (unsigned int) image->columns;
2095  clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
2096  imageHeight = (unsigned int) image->rows;
2097  clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
2098  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2099  clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
2100  if (clStatus != CL_SUCCESS)
2101  {
2102    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2103    goto cleanup;
2104  }
2105
2106  clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2107  clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2108  imageWidth = (unsigned int) image->columns;
2109  clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2110  imageHeight = (unsigned int) image->rows;
2111  clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2112  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2113  clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2114  if (clStatus != CL_SUCCESS)
2115  {
2116    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2117    goto cleanup;
2118  }
2119
2120
2121  global_work_size[0] = image->columns;
2122  global_work_size[1] = image->rows;
2123
2124
2125  for (k = 0; k < 4; k++)
2126  {
2127    cl_int2 offset;
2128    int polarity;
2129
2130
2131    offset.s[0] = X[k];
2132    offset.s[1] = Y[k];
2133    polarity = 1;
2134    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2135    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2136    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2137    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2138    if (clStatus != CL_SUCCESS)
2139    {
2140      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2141      goto cleanup;
2142    }
2143    /* launch the kernel */
2144	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2145    if (clStatus != CL_SUCCESS)
2146    {
2147      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2148      goto cleanup;
2149    }
2150    RecordProfileData(device,hullPass1,event);
2151
2152    /* launch the kernel */
2153	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2154    if (clStatus != CL_SUCCESS)
2155    {
2156      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2157      goto cleanup;
2158    }
2159    RecordProfileData(device,hullPass2,event);
2160
2161    if (k == 0)
2162      clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2163    offset.s[0] = -X[k];
2164    offset.s[1] = -Y[k];
2165    polarity = 1;
2166    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2167    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2168    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2169    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2170    if (clStatus != CL_SUCCESS)
2171    {
2172      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2173      goto cleanup;
2174    }
2175    /* launch the kernel */
2176	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2177    if (clStatus != CL_SUCCESS)
2178    {
2179      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2180      goto cleanup;
2181    }
2182    RecordProfileData(device,hullPass1,event);
2183
2184    /* launch the kernel */
2185	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2186    if (clStatus != CL_SUCCESS)
2187    {
2188      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2189      goto cleanup;
2190    }
2191    RecordProfileData(device,hullPass2,event);
2192
2193    offset.s[0] = -X[k];
2194    offset.s[1] = -Y[k];
2195    polarity = -1;
2196    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2197    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2198    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2199    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2200    if (clStatus != CL_SUCCESS)
2201    {
2202      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2203      goto cleanup;
2204    }
2205    /* launch the kernel */
2206	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2207    if (clStatus != CL_SUCCESS)
2208    {
2209      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2210      goto cleanup;
2211    }
2212    RecordProfileData(device,hullPass1,event);
2213
2214    /* launch the kernel */
2215	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2216    if (clStatus != CL_SUCCESS)
2217    {
2218      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2219      goto cleanup;
2220    }
2221    RecordProfileData(device,hullPass2,event);
2222
2223    offset.s[0] = X[k];
2224    offset.s[1] = Y[k];
2225    polarity = -1;
2226    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2227    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2228    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2229    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2230
2231    if (k == 3)
2232      clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2233
2234    if (clStatus != CL_SUCCESS)
2235    {
2236      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2237      goto cleanup;
2238    }
2239    /* launch the kernel */
2240	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2241    if (clStatus != CL_SUCCESS)
2242    {
2243      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2244      goto cleanup;
2245    }
2246    RecordProfileData(device,hullPass1,event);
2247
2248    /* launch the kernel */
2249	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2250    if (clStatus != CL_SUCCESS)
2251    {
2252      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2253      goto cleanup;
2254    }
2255    RecordProfileData(device,hullPass2,event);
2256  }
2257
2258  if (ALIGNED(filteredPixels,CLPixelPacket))
2259  {
2260    length = image->columns * image->rows;
2261    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2262  }
2263  else
2264  {
2265    length = image->columns * image->rows;
2266    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2267  }
2268  if (clStatus != CL_SUCCESS)
2269  {
2270    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2271    goto cleanup;
2272  }
2273
2274  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2275
2276cleanup:
2277
2278  image_view=DestroyCacheView(image_view);
2279  if (filteredImage_view != NULL)
2280    filteredImage_view=DestroyCacheView(filteredImage_view);
2281
2282  if (queue != NULL)
2283    ReleaseOpenCLCommandQueue(device,queue);
2284  if (device != NULL)
2285    ReleaseOpenCLDevice(device);
2286  if (imageBuffer!=NULL)
2287    clEnv->library->clReleaseMemObject(imageBuffer);
2288  for (k = 0; k < 2; k++)
2289  {
2290    if (tempImageBuffer[k]!=NULL)
2291      clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2292  }
2293  if (filteredImageBuffer!=NULL)
2294    clEnv->library->clReleaseMemObject(filteredImageBuffer);
2295  if (hullPass1!=NULL)
2296    ReleaseOpenCLKernel(hullPass1);
2297  if (hullPass2!=NULL)
2298    ReleaseOpenCLKernel(hullPass2);
2299  if (outputReady == MagickFalse && filteredImage != NULL)
2300    filteredImage=DestroyImage(filteredImage);
2301
2302  return(filteredImage);
2303}
2304
2305MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2306  ExceptionInfo* exception)
2307{
2308  Image
2309    *filteredImage;
2310
2311  MagickCLEnv
2312    clEnv;
2313
2314  assert(image != NULL);
2315  assert(exception != (ExceptionInfo *) NULL);
2316
2317  if (checkAccelerateConditionRGBA(image) == MagickFalse)
2318    return((Image *) NULL);
2319
2320  clEnv=getOpenCLEnvironment(exception);
2321  if (clEnv == (MagickCLEnv) NULL)
2322    return((Image *) NULL);
2323
2324  filteredImage=ComputeDespeckleImage(image,clEnv,exception);
2325  return(filteredImage);
2326}
2327
2328/*
2329%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2330%                                                                             %
2331%                                                                             %
2332%                                                                             %
2333%     A c c e l e r a t e E q u a l i z e I m a g e                           %
2334%                                                                             %
2335%                                                                             %
2336%                                                                             %
2337%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2338*/
2339
2340static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
2341  ExceptionInfo *exception)
2342{
2343#define EqualizeImageTag  "Equalize/Image"
2344
2345  CacheView
2346    *image_view;
2347
2348  cl_command_queue
2349    queue;
2350
2351  cl_int
2352    clStatus;
2353
2354  cl_mem_flags
2355    mem_flags;
2356
2357  cl_mem
2358    equalizeMapBuffer,
2359    histogramBuffer,
2360    imageBuffer;
2361
2362  cl_kernel
2363    equalizeKernel,
2364    histogramKernel;
2365
2366  cl_event
2367    event;
2368
2369  cl_uint4
2370    *histogram;
2371
2372  FloatPixelPacket
2373    white,
2374    black,
2375    intensity,
2376    *map;
2377
2378  MagickBooleanType
2379    outputReady,
2380    status;
2381
2382  MagickCLDevice
2383    device;
2384
2385  MagickSizeType
2386    length;
2387
2388  PixelPacket
2389    *equalize_map;
2390
2391  register ssize_t
2392    i;
2393
2394  size_t
2395    global_work_size[2];
2396
2397  void
2398    *hostPtr,
2399    *inputPixels;
2400
2401  map=NULL;
2402  histogram=NULL;
2403  equalize_map=NULL;
2404  inputPixels = NULL;
2405  imageBuffer = NULL;
2406  histogramBuffer = NULL;
2407  equalizeMapBuffer = NULL;
2408  histogramKernel = NULL;
2409  equalizeKernel = NULL;
2410  queue = NULL;
2411  outputReady = MagickFalse;
2412
2413  assert(image != (Image *) NULL);
2414  assert(image->signature == MagickCoreSignature);
2415  if (image->debug != MagickFalse)
2416    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2417
2418  /*
2419   * initialize opencl env
2420   */
2421  device = RequestOpenCLDevice(clEnv);
2422  queue = AcquireOpenCLCommandQueue(device);
2423
2424  /*
2425    Allocate and initialize histogram arrays.
2426  */
2427  histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
2428  if (histogram == (cl_uint4 *) NULL)
2429      ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2430
2431  /* reset histogram */
2432  (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
2433
2434  /* Create and initialize OpenCL buffers. */
2435  /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
2436  /* assume this  will get a writable image */
2437  image_view=AcquireAuthenticCacheView(image,exception);
2438  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2439
2440  if (inputPixels == (void *) NULL)
2441  {
2442    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2443    goto cleanup;
2444  }
2445  /* If the host pointer is aligned to the size of CLPixelPacket,
2446     then use the host buffer directly from the GPU; otherwise,
2447     create a buffer on the GPU and copy the data over */
2448  if (ALIGNED(inputPixels,CLPixelPacket))
2449  {
2450    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2451  }
2452  else
2453  {
2454    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2455  }
2456  /* create a CL buffer from image pixel buffer */
2457  length = image->columns * image->rows;
2458  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2459  if (clStatus != CL_SUCCESS)
2460  {
2461    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2462    goto cleanup;
2463  }
2464
2465  /* If the host pointer is aligned to the size of cl_uint,
2466     then use the host buffer directly from the GPU; otherwise,
2467     create a buffer on the GPU and copy the data over */
2468  if (ALIGNED(histogram,cl_uint4))
2469  {
2470    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2471    hostPtr = histogram;
2472  }
2473  else
2474  {
2475    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2476    hostPtr = histogram;
2477  }
2478  /* create a CL buffer for histogram  */
2479  length = (MaxMap+1);
2480  histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2481  if (clStatus != CL_SUCCESS)
2482  {
2483    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2484    goto cleanup;
2485  }
2486
2487  status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
2488  if (status == MagickFalse)
2489    goto cleanup;
2490
2491  /* read from the kenel output */
2492  if (ALIGNED(histogram,cl_uint4))
2493  {
2494    length = (MaxMap+1);
2495    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2496  }
2497  else
2498  {
2499    length = (MaxMap+1);
2500    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2501  }
2502  if (clStatus != CL_SUCCESS)
2503  {
2504    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2505    goto cleanup;
2506  }
2507
2508  /* unmap, don't block gpu to use this buffer again.  */
2509  if (ALIGNED(histogram,cl_uint4))
2510  {
2511    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2512    if (clStatus != CL_SUCCESS)
2513    {
2514      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
2515      goto cleanup;
2516    }
2517  }
2518
2519  /* recreate input buffer later, in case image updated */
2520#ifdef RECREATEBUFFER
2521  if (imageBuffer!=NULL)
2522    clEnv->library->clReleaseMemObject(imageBuffer);
2523#endif
2524
2525  /* CPU stuff */
2526  equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
2527  if (equalize_map == (PixelPacket *) NULL)
2528    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2529
2530  map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
2531  if (map == (FloatPixelPacket *) NULL)
2532    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2533
2534  /*
2535    Integrate the histogram to get the equalization map.
2536  */
2537  (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
2538  for (i=0; i <= (ssize_t) MaxMap; i++)
2539  {
2540    if ((image->channel_mask & SyncChannels) != 0)
2541    {
2542      intensity.red+=histogram[i].s[2];
2543      map[i]=intensity;
2544      continue;
2545    }
2546    if ((image->channel_mask & RedChannel) != 0)
2547      intensity.red+=histogram[i].s[2];
2548    if ((image->channel_mask & GreenChannel) != 0)
2549      intensity.green+=histogram[i].s[1];
2550    if ((image->channel_mask & BlueChannel) != 0)
2551      intensity.blue+=histogram[i].s[0];
2552    if ((image->channel_mask & AlphaChannel) != 0)
2553      intensity.alpha+=histogram[i].s[3];
2554    /*
2555    if (((channel & IndexChannel) != 0) &&
2556        (image->colorspace == CMYKColorspace))
2557    {
2558      intensity.index+=histogram[i].index;
2559    }
2560    */
2561    map[i]=intensity;
2562  }
2563  black=map[0];
2564  white=map[(int) MaxMap];
2565  (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
2566  for (i=0; i <= (ssize_t) MaxMap; i++)
2567  {
2568    if ((image->channel_mask & SyncChannels) != 0)
2569    {
2570      if (white.red != black.red)
2571        equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2572                (map[i].red-black.red))/(white.red-black.red)));
2573      continue;
2574    }
2575    if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
2576      equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2577              (map[i].red-black.red))/(white.red-black.red)));
2578    if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
2579      equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2580              (map[i].green-black.green))/(white.green-black.green)));
2581    if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
2582      equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2583              (map[i].blue-black.blue))/(white.blue-black.blue)));
2584    if (((image->channel_mask & AlphaChannel) != 0) && (white.alpha != black.alpha))
2585      equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2586              (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
2587    /*
2588    if ((((channel & IndexChannel) != 0) &&
2589          (image->colorspace == CMYKColorspace)) &&
2590        (white.index != black.index))
2591      equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2592              (map[i].index-black.index))/(white.index-black.index)));
2593    */
2594  }
2595
2596  if (image->storage_class == PseudoClass)
2597  {
2598    /*
2599       Equalize colormap.
2600       */
2601    for (i=0; i < (ssize_t) image->colors; i++)
2602    {
2603      if ((image->channel_mask & SyncChannels) != 0)
2604      {
2605        if (white.red != black.red)
2606        {
2607          image->colormap[i].red=equalize_map[
2608            ScaleQuantumToMap(image->colormap[i].red)].red;
2609          image->colormap[i].green=equalize_map[
2610            ScaleQuantumToMap(image->colormap[i].green)].red;
2611          image->colormap[i].blue=equalize_map[
2612            ScaleQuantumToMap(image->colormap[i].blue)].red;
2613          image->colormap[i].alpha=equalize_map[
2614            ScaleQuantumToMap(image->colormap[i].alpha)].red;
2615        }
2616        continue;
2617      }
2618      if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
2619        image->colormap[i].red=equalize_map[
2620          ScaleQuantumToMap(image->colormap[i].red)].red;
2621      if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
2622        image->colormap[i].green=equalize_map[
2623          ScaleQuantumToMap(image->colormap[i].green)].green;
2624      if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
2625        image->colormap[i].blue=equalize_map[
2626          ScaleQuantumToMap(image->colormap[i].blue)].blue;
2627      if (((image->channel_mask & AlphaChannel) != 0) &&
2628          (white.alpha != black.alpha))
2629        image->colormap[i].alpha=equalize_map[
2630          ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2631    }
2632  }
2633
2634  /*
2635    Equalize image.
2636  */
2637
2638  /* GPU can work on this again, image and equalize map as input
2639    image:        uchar4 (CLPixelPacket)
2640    equalize_map: uchar4 (PixelPacket)
2641    black, white: float4 (FloatPixelPacket) */
2642
2643#ifdef RECREATEBUFFER
2644  /* If the host pointer is aligned to the size of CLPixelPacket,
2645     then use the host buffer directly from the GPU; otherwise,
2646     create a buffer on the GPU and copy the data over */
2647  if (ALIGNED(inputPixels,CLPixelPacket))
2648  {
2649    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2650  }
2651  else
2652  {
2653    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2654  }
2655  /* create a CL buffer from image pixel buffer */
2656  length = image->columns * image->rows;
2657  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2658  if (clStatus != CL_SUCCESS)
2659  {
2660    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2661    goto cleanup;
2662  }
2663#endif
2664
2665  /* Create and initialize OpenCL buffers. */
2666  if (ALIGNED(equalize_map, PixelPacket))
2667  {
2668    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2669    hostPtr = equalize_map;
2670  }
2671  else
2672  {
2673    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2674    hostPtr = equalize_map;
2675  }
2676  /* create a CL buffer for eqaulize_map  */
2677  length = (MaxMap+1);
2678  equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2679  if (clStatus != CL_SUCCESS)
2680  {
2681    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2682    goto cleanup;
2683  }
2684
2685  /* get the OpenCL kernel */
2686  equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
2687  if (equalizeKernel == NULL)
2688  {
2689    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
2690    goto cleanup;
2691  }
2692
2693  /* set the kernel arguments */
2694  i = 0;
2695  clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2696  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
2697  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2698  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
2699  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
2700  if (clStatus != CL_SUCCESS)
2701  {
2702    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2703    goto cleanup;
2704  }
2705
2706  /* launch the kernel */
2707  global_work_size[0] = image->columns;
2708  global_work_size[1] = image->rows;
2709
2710  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2711
2712  if (clStatus != CL_SUCCESS)
2713  {
2714    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2715    goto cleanup;
2716  }
2717  RecordProfileData(device,equalizeKernel,event);
2718
2719  /* read the data back */
2720  if (ALIGNED(inputPixels,CLPixelPacket))
2721  {
2722    length = image->columns * image->rows;
2723    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2724  }
2725  else
2726  {
2727    length = image->columns * image->rows;
2728    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2729  }
2730  if (clStatus != CL_SUCCESS)
2731  {
2732    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2733    goto cleanup;
2734  }
2735
2736  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2737
2738cleanup:
2739
2740  image_view=DestroyCacheView(image_view);
2741
2742  if (imageBuffer!=NULL)
2743    clEnv->library->clReleaseMemObject(imageBuffer);
2744  if (map!=NULL)
2745    map=(FloatPixelPacket *) RelinquishMagickMemory(map);
2746  if (equalizeMapBuffer!=NULL)
2747    clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2748  if (equalize_map!=NULL)
2749    equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2750  if (histogramBuffer!=NULL)
2751    clEnv->library->clReleaseMemObject(histogramBuffer);
2752  if (histogram!=NULL)
2753    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2754  if (histogramKernel!=NULL)
2755    ReleaseOpenCLKernel(histogramKernel);
2756  if (equalizeKernel!=NULL)
2757    ReleaseOpenCLKernel(equalizeKernel);
2758  if (queue != NULL)
2759    ReleaseOpenCLCommandQueue(device, queue);
2760  if (device != NULL)
2761    ReleaseOpenCLDevice(device);
2762
2763  return(outputReady);
2764}
2765
2766MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2767  ExceptionInfo *exception)
2768{
2769  MagickBooleanType
2770    status;
2771
2772  MagickCLEnv
2773    clEnv;
2774
2775  assert(image != NULL);
2776  assert(exception != (ExceptionInfo *) NULL);
2777
2778  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
2779      (checkHistogramCondition(image,image->intensity) == MagickFalse))
2780    return(MagickFalse);
2781
2782  clEnv=getOpenCLEnvironment(exception);
2783  if (clEnv == (MagickCLEnv) NULL)
2784    return(MagickFalse);
2785
2786  status=ComputeEqualizeImage(image,clEnv,exception);
2787  return(status);
2788}
2789
2790/*
2791%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2792%                                                                             %
2793%                                                                             %
2794%                                                                             %
2795%     A c c e l e r a t e F u n c t i o n I m a g e                           %
2796%                                                                             %
2797%                                                                             %
2798%                                                                             %
2799%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2800*/
2801
2802static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
2803  const MagickFunction function,const size_t number_parameters,
2804  const double *parameters,ExceptionInfo *exception)
2805{
2806  cl_command_queue
2807    queue;
2808
2809  cl_int
2810    status;
2811
2812  cl_kernel
2813    functionKernel;
2814
2815  cl_mem
2816    imageBuffer,
2817    parametersBuffer;
2818
2819  cl_uint
2820    number_params,
2821    number_channels;
2822
2823  float
2824    *parametersBufferPtr;
2825
2826  MagickBooleanType
2827    outputReady;
2828
2829  MagickCLDevice
2830    device;
2831
2832  size_t
2833    gsize[2],
2834    i;
2835
2836  outputReady=MagickFalse;
2837
2838  functionKernel=NULL;
2839  parametersBuffer=NULL;
2840
2841  device=RequestOpenCLDevice(clEnv);
2842  queue=AcquireOpenCLCommandQueue(device);
2843  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2844  if (imageBuffer == (cl_mem) NULL)
2845    goto cleanup;
2846
2847  parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
2848    sizeof(float));
2849  if (parametersBufferPtr == (float *) NULL)
2850    goto cleanup;
2851  for (i=0; i<number_parameters; i++)
2852    parametersBufferPtr[i]=(float) parameters[i];
2853  parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
2854    CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
2855    parametersBufferPtr);
2856  parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2857  if (parametersBuffer == (cl_mem) NULL)
2858  {
2859    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2860      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
2861    goto cleanup;
2862  }
2863
2864  functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
2865  if (functionKernel == (cl_kernel) NULL)
2866  {
2867    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2868      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2869    goto cleanup;
2870  }
2871
2872  number_channels=(cl_uint) image->number_channels;
2873  number_params=(cl_uint) number_parameters;
2874
2875  i=0;
2876  status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2877  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
2878  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
2879  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
2880  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
2881  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
2882  if (status != CL_SUCCESS)
2883  {
2884    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2885      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2886    goto cleanup;
2887  }
2888
2889  gsize[0]=image->columns;
2890  gsize[1]=image->rows;
2891  outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
2892    gsize,(const size_t *) NULL,image,(const Image *) NULL,exception);
2893
2894cleanup:
2895
2896  if (parametersBuffer != (cl_mem) NULL)
2897    ReleaseOpenCLMemObject(parametersBuffer);
2898  if (functionKernel != (cl_kernel) NULL)
2899    ReleaseOpenCLKernel(functionKernel);
2900  if (queue != (cl_command_queue) NULL)
2901    ReleaseOpenCLCommandQueue(device,queue);
2902  if (device != (MagickCLDevice) NULL)
2903    ReleaseOpenCLDevice(device);
2904  return(outputReady);
2905}
2906
2907MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2908  const MagickFunction function,const size_t number_parameters,
2909  const double *parameters,ExceptionInfo *exception)
2910{
2911  MagickBooleanType
2912    status;
2913
2914  MagickCLEnv
2915    clEnv;
2916
2917  assert(image != NULL);
2918  assert(exception != (ExceptionInfo *) NULL);
2919
2920  if (checkAccelerateCondition(image) == MagickFalse)
2921    return(MagickFalse);
2922
2923  clEnv=getOpenCLEnvironment(exception);
2924  if (clEnv == (MagickCLEnv) NULL)
2925    return(MagickFalse);
2926
2927  status=ComputeFunctionImage(image,clEnv,function,number_parameters,
2928    parameters,exception);
2929  return(status);
2930}
2931
2932/*
2933%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2934%                                                                             %
2935%                                                                             %
2936%                                                                             %
2937%     A c c e l e r a t e G r a y s c a l e I m a g e                         %
2938%                                                                             %
2939%                                                                             %
2940%                                                                             %
2941%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2942*/
2943
2944static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
2945  const PixelIntensityMethod method,ExceptionInfo *exception)
2946{
2947  cl_command_queue
2948    queue;
2949
2950  cl_int
2951    status;
2952
2953  cl_kernel
2954    grayscaleKernel;
2955
2956  cl_mem
2957    imageBuffer;
2958
2959  cl_uint
2960    number_channels,
2961    colorspace,
2962    intensityMethod;
2963
2964  MagickBooleanType
2965    outputReady;
2966
2967  MagickCLDevice
2968    device;
2969
2970  size_t
2971    gsize[2],
2972    i;
2973
2974  outputReady=MagickFalse;
2975  grayscaleKernel=NULL;
2976
2977  assert(image != (Image *) NULL);
2978  assert(image->signature == MagickCoreSignature);
2979  device=RequestOpenCLDevice(clEnv);
2980  queue=AcquireOpenCLCommandQueue(device);
2981  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2982  if (imageBuffer == (cl_mem) NULL)
2983    goto cleanup;
2984
2985  grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
2986  if (grayscaleKernel == (cl_kernel) NULL)
2987  {
2988    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2989      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2990    goto cleanup;
2991  }
2992
2993  number_channels=(cl_uint) image->number_channels;
2994  intensityMethod=(cl_uint) method;
2995  colorspace=(cl_uint) image->colorspace;
2996
2997  i=0;
2998  status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2999  status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
3000  status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
3001  status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
3002  if (status != CL_SUCCESS)
3003  {
3004    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
3005      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
3006    goto cleanup;
3007  }
3008
3009  gsize[0]=image->columns;
3010  gsize[1]=image->rows;
3011  outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
3012    (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
3013    exception);
3014
3015cleanup:
3016
3017  if (grayscaleKernel != (cl_kernel) NULL)
3018    ReleaseOpenCLKernel(grayscaleKernel);
3019  if (queue != (cl_command_queue) NULL)
3020    ReleaseOpenCLCommandQueue(device,queue);
3021  if (device != (MagickCLDevice) NULL)
3022    ReleaseOpenCLDevice(device);
3023
3024  return(outputReady);
3025}
3026
3027MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
3028  const PixelIntensityMethod method,ExceptionInfo *exception)
3029{
3030  MagickBooleanType
3031    status;
3032
3033  MagickCLEnv
3034    clEnv;
3035
3036  assert(image != NULL);
3037  assert(exception != (ExceptionInfo *) NULL);
3038
3039  if ((checkAccelerateCondition(image) == MagickFalse) ||
3040      (checkPixelIntensity(image,method) == MagickFalse))
3041    return(MagickFalse);
3042
3043  if (image->number_channels < 3)
3044    return(MagickFalse);
3045
3046  if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
3047      (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
3048      (GetPixelBlueTraits(image) == UndefinedPixelTrait))
3049    return(MagickFalse);
3050
3051  clEnv=getOpenCLEnvironment(exception);
3052  if (clEnv == (MagickCLEnv) NULL)
3053    return(MagickFalse);
3054
3055  status=ComputeGrayscaleImage(image,clEnv,method,exception);
3056  return(status);
3057}
3058
3059/*
3060%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3061%                                                                             %
3062%                                                                             %
3063%                                                                             %
3064%     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
3065%                                                                             %
3066%                                                                             %
3067%                                                                             %
3068%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3069*/
3070
3071static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
3072  const double radius,const double strength,ExceptionInfo *exception)
3073{
3074  CacheView
3075    *filteredImage_view,
3076    *image_view;
3077
3078  cl_command_queue
3079    queue;
3080
3081  cl_int
3082    clStatus,
3083    iRadius;
3084
3085  cl_kernel
3086    blurRowKernel,
3087    blurColumnKernel;
3088
3089  cl_event
3090    event;
3091
3092  cl_mem
3093    filteredImageBuffer,
3094    imageBuffer,
3095    imageKernelBuffer,
3096    tempImageBuffer;
3097
3098  cl_mem_flags
3099    mem_flags;
3100
3101  const void
3102    *inputPixels;
3103
3104  Image
3105    *filteredImage;
3106
3107  MagickBooleanType
3108    outputReady;
3109
3110  MagickCLDevice
3111    device;
3112
3113  MagickSizeType
3114    length;
3115
3116  void
3117    *filteredPixels,
3118    *hostPtr;
3119
3120  unsigned int
3121    i,
3122    imageColumns,
3123    imageRows,
3124    passes;
3125
3126  filteredImage = NULL;
3127  filteredImage_view = NULL;
3128  imageBuffer = NULL;
3129  filteredImageBuffer = NULL;
3130  tempImageBuffer = NULL;
3131  imageKernelBuffer = NULL;
3132  blurRowKernel = NULL;
3133  blurColumnKernel = NULL;
3134  queue = NULL;
3135  outputReady = MagickFalse;
3136
3137  device = RequestOpenCLDevice(clEnv);
3138  queue = AcquireOpenCLCommandQueue(device);
3139
3140  /* Create and initialize OpenCL buffers. */
3141  {
3142    image_view=AcquireAuthenticCacheView(image,exception);
3143    inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3144    if (inputPixels == (const void *) NULL)
3145    {
3146      (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3147      goto cleanup;
3148    }
3149
3150    /* If the host pointer is aligned to the size of CLPixelPacket,
3151     then use the host buffer directly from the GPU; otherwise,
3152     create a buffer on the GPU and copy the data over */
3153    if (ALIGNED(inputPixels,CLPixelPacket))
3154    {
3155      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3156    }
3157    else
3158    {
3159      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3160    }
3161    /* create a CL buffer from image pixel buffer */
3162    length = image->columns * image->rows;
3163    imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3164    if (clStatus != CL_SUCCESS)
3165    {
3166      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3167      goto cleanup;
3168    }
3169  }
3170
3171  /* create output */
3172  {
3173    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
3174    assert(filteredImage != NULL);
3175    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3176    {
3177      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
3178      goto cleanup;
3179    }
3180    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3181    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3182    if (filteredPixels == (void *) NULL)
3183    {
3184      (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3185      goto cleanup;
3186    }
3187
3188    if (ALIGNED(filteredPixels,CLPixelPacket))
3189    {
3190      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3191      hostPtr = filteredPixels;
3192    }
3193    else
3194    {
3195      mem_flags = CL_MEM_WRITE_ONLY;
3196      hostPtr = NULL;
3197    }
3198
3199    /* create a CL buffer from image pixel buffer */
3200    length = image->columns * image->rows;
3201    filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3202    if (clStatus != CL_SUCCESS)
3203    {
3204      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3205      goto cleanup;
3206    }
3207  }
3208
3209  {
3210    /* create temp buffer */
3211    {
3212      length = image->columns * image->rows;
3213      tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3214      if (clStatus != CL_SUCCESS)
3215      {
3216        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3217        goto cleanup;
3218      }
3219    }
3220
3221    /* get the opencl kernel */
3222    {
3223      blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
3224      if (blurRowKernel == NULL)
3225      {
3226        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3227        goto cleanup;
3228      };
3229
3230      blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
3231      if (blurColumnKernel == NULL)
3232      {
3233        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3234        goto cleanup;
3235      };
3236    }
3237
3238    {
3239      imageColumns = (unsigned int) image->columns;
3240      imageRows = (unsigned int) image->rows;
3241      iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); // Normalized radius, 100% gives blur radius of 20% of the largest dimension
3242
3243      passes = (((1.0f * imageColumns) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3244      passes = (passes < 1) ? 1: passes;
3245
3246      /* set the kernel arguments */
3247      i = 0;
3248      clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3249      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3250      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3251      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3252      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3253      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3254
3255      if (clStatus != CL_SUCCESS)
3256      {
3257        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3258        goto cleanup;
3259      }
3260    }
3261
3262    /* launch the kernel */
3263    {
3264      int x;
3265      for (x = 0; x < passes; ++x) {
3266        size_t gsize[2];
3267        size_t wsize[2];
3268        size_t goffset[2];
3269
3270        gsize[0] = 256;
3271        gsize[1] = image->rows / passes;
3272        wsize[0] = 256;
3273        wsize[1] = 1;
3274        goffset[0] = 0;
3275        goffset[1] = x * gsize[1];
3276
3277        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3278        if (clStatus != CL_SUCCESS)
3279        {
3280          (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3281          goto cleanup;
3282        }
3283        clEnv->library->clFlush(queue);
3284        RecordProfileData(device,blurRowKernel,event);
3285      }
3286    }
3287
3288    {
3289      cl_float FStrength = strength;
3290      i = 0;
3291      clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3292      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3293      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3294      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3295      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3296      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3297      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3298
3299      if (clStatus != CL_SUCCESS)
3300      {
3301        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3302        goto cleanup;
3303      }
3304    }
3305
3306    /* launch the kernel */
3307    {
3308      int x;
3309      for (x = 0; x < passes; ++x) {
3310        size_t gsize[2];
3311        size_t wsize[2];
3312        size_t goffset[2];
3313
3314        gsize[0] = ((image->columns + 3) / 4) * 4;
3315        gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3316        wsize[0] = 4;
3317        wsize[1] = 64;
3318        goffset[0] = 0;
3319        goffset[1] = x * gsize[1];
3320
3321        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3322        if (clStatus != CL_SUCCESS)
3323        {
3324          (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3325          goto cleanup;
3326        }
3327        clEnv->library->clFlush(queue);
3328        RecordProfileData(device,blurColumnKernel,event);
3329      }
3330    }
3331  }
3332
3333  /* get result */
3334  if (ALIGNED(filteredPixels,CLPixelPacket))
3335  {
3336    length = image->columns * image->rows;
3337    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3338  }
3339  else
3340  {
3341    length = image->columns * image->rows;
3342    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3343  }
3344  if (clStatus != CL_SUCCESS)
3345  {
3346    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3347    goto cleanup;
3348  }
3349
3350  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3351
3352cleanup:
3353
3354  image_view=DestroyCacheView(image_view);
3355  if (filteredImage_view != NULL)
3356    filteredImage_view=DestroyCacheView(filteredImage_view);
3357
3358  if (imageBuffer!=NULL)
3359    clEnv->library->clReleaseMemObject(imageBuffer);
3360  if (filteredImageBuffer!=NULL)
3361    clEnv->library->clReleaseMemObject(filteredImageBuffer);
3362  if (tempImageBuffer!=NULL)
3363    clEnv->library->clReleaseMemObject(tempImageBuffer);
3364  if (imageKernelBuffer!=NULL)
3365    clEnv->library->clReleaseMemObject(imageKernelBuffer);
3366  if (blurRowKernel!=NULL)
3367    ReleaseOpenCLKernel(blurRowKernel);
3368  if (blurColumnKernel!=NULL)
3369    ReleaseOpenCLKernel(blurColumnKernel);
3370  if (queue != NULL)
3371    ReleaseOpenCLCommandQueue(device, queue);
3372  if (device != NULL)
3373    ReleaseOpenCLDevice(device);
3374  if (outputReady == MagickFalse)
3375  {
3376    if (filteredImage != NULL)
3377    {
3378      DestroyImage(filteredImage);
3379      filteredImage = NULL;
3380    }
3381  }
3382
3383  return(filteredImage);
3384}
3385
3386MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3387  const double radius,const double strength,ExceptionInfo *exception)
3388{
3389  Image
3390    *filteredImage;
3391
3392  MagickCLEnv
3393    clEnv;
3394
3395  assert(image != NULL);
3396  assert(exception != (ExceptionInfo *) NULL);
3397
3398  if (checkAccelerateConditionRGBA(image) == MagickFalse)
3399    return((Image *) NULL);
3400
3401  clEnv=getOpenCLEnvironment(exception);
3402  if (clEnv == (MagickCLEnv) NULL)
3403    return((Image *) NULL);
3404
3405  filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
3406    exception);
3407  return(filteredImage);
3408}
3409
3410/*
3411%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3412%                                                                             %
3413%                                                                             %
3414%                                                                             %
3415%     A c c e l e r a t e M o d u l a t e I m a g e                           %
3416%                                                                             %
3417%                                                                             %
3418%                                                                             %
3419%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3420*/
3421
3422static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
3423  const double percent_brightness,const double percent_hue,
3424  const double percent_saturation,const ColorspaceType colorspace,
3425  ExceptionInfo *exception)
3426{
3427  CacheView
3428    *image_view;
3429
3430  cl_float
3431    bright,
3432    hue,
3433    saturation;
3434
3435  cl_command_queue
3436    queue;
3437
3438  cl_int
3439    color,
3440    clStatus;
3441
3442  cl_kernel
3443    modulateKernel;
3444
3445  cl_event
3446    event;
3447
3448  cl_mem
3449    imageBuffer;
3450
3451  cl_mem_flags
3452    mem_flags;
3453
3454  MagickBooleanType
3455    outputReady;
3456
3457  MagickCLDevice
3458    device;
3459
3460  MagickSizeType
3461    length;
3462
3463  register ssize_t
3464    i;
3465
3466  void
3467    *inputPixels;
3468
3469  inputPixels = NULL;
3470  imageBuffer = NULL;
3471  modulateKernel = NULL;
3472
3473  assert(image != (Image *) NULL);
3474  assert(image->signature == MagickCoreSignature);
3475  if (image->debug != MagickFalse)
3476    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3477
3478  /*
3479   * initialize opencl env
3480   */
3481  device = RequestOpenCLDevice(clEnv);
3482  queue = AcquireOpenCLCommandQueue(device);
3483
3484  outputReady = MagickFalse;
3485
3486  /* Create and initialize OpenCL buffers.
3487   inputPixels = AcquirePixelCachePixels(image, &length, exception);
3488   assume this  will get a writable image
3489   */
3490  image_view=AcquireAuthenticCacheView(image,exception);
3491  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3492  if (inputPixels == (void *) NULL)
3493  {
3494    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3495    goto cleanup;
3496  }
3497
3498  /* If the host pointer is aligned to the size of CLPixelPacket,
3499   then use the host buffer directly from the GPU; otherwise,
3500   create a buffer on the GPU and copy the data over
3501   */
3502  if (ALIGNED(inputPixels,CLPixelPacket))
3503  {
3504    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3505  }
3506  else
3507  {
3508    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3509  }
3510  /* create a CL buffer from image pixel buffer */
3511  length = image->columns * image->rows;
3512  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3513  if (clStatus != CL_SUCCESS)
3514  {
3515    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3516    goto cleanup;
3517  }
3518
3519  modulateKernel = AcquireOpenCLKernel(device, "Modulate");
3520  if (modulateKernel == NULL)
3521  {
3522    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3523    goto cleanup;
3524  }
3525
3526  bright=percent_brightness;
3527  hue=percent_hue;
3528  saturation=percent_saturation;
3529  color=colorspace;
3530
3531  i = 0;
3532  clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3533  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3534  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3535  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3536  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3537  if (clStatus != CL_SUCCESS)
3538  {
3539    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3540    goto cleanup;
3541  }
3542
3543  {
3544    size_t global_work_size[2];
3545    global_work_size[0] = image->columns;
3546    global_work_size[1] = image->rows;
3547    /* launch the kernel */
3548	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3549    if (clStatus != CL_SUCCESS)
3550    {
3551      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3552      goto cleanup;
3553    }
3554    RecordProfileData(device,modulateKernel,event);
3555  }
3556
3557  if (ALIGNED(inputPixels,CLPixelPacket))
3558  {
3559    length = image->columns * image->rows;
3560    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3561  }
3562  else
3563  {
3564    length = image->columns * image->rows;
3565    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3566  }
3567  if (clStatus != CL_SUCCESS)
3568  {
3569    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3570    goto cleanup;
3571  }
3572
3573  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3574
3575cleanup:
3576
3577  image_view=DestroyCacheView(image_view);
3578
3579  if (imageBuffer!=NULL)
3580    clEnv->library->clReleaseMemObject(imageBuffer);
3581  if (modulateKernel!=NULL)
3582    ReleaseOpenCLKernel(modulateKernel);
3583  if (queue != NULL)
3584    ReleaseOpenCLCommandQueue(device,queue);
3585  if (device != NULL)
3586    ReleaseOpenCLDevice(device);
3587
3588  return outputReady;
3589
3590}
3591
3592MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3593  const double percent_brightness,const double percent_hue,
3594  const double percent_saturation,const ColorspaceType colorspace,
3595  ExceptionInfo *exception)
3596{
3597  MagickBooleanType
3598    status;
3599
3600  MagickCLEnv
3601    clEnv;
3602
3603  assert(image != NULL);
3604  assert(exception != (ExceptionInfo *) NULL);
3605
3606  if (checkAccelerateConditionRGBA(image) == MagickFalse)
3607    return(MagickFalse);
3608
3609  if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
3610    return(MagickFalse);
3611
3612  clEnv=getOpenCLEnvironment(exception);
3613  if (clEnv == (MagickCLEnv) NULL)
3614    return(MagickFalse);
3615
3616  status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
3617    percent_saturation,colorspace,exception);
3618  return(status);
3619}
3620
3621/*
3622%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3623%                                                                             %
3624%                                                                             %
3625%                                                                             %
3626%     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
3627%                                                                             %
3628%                                                                             %
3629%                                                                             %
3630%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3631*/
3632
3633static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
3634  const double *kernel,const size_t width,const OffsetInfo *offset,
3635  ExceptionInfo *exception)
3636{
3637  CacheView
3638    *filteredImage_view,
3639    *image_view;
3640
3641  cl_command_queue
3642    queue;
3643
3644  cl_float4
3645    biasPixel;
3646
3647  cl_int
3648    clStatus;
3649
3650  cl_kernel
3651    motionBlurKernel;
3652
3653  cl_event
3654    event;
3655
3656  cl_mem
3657    filteredImageBuffer,
3658    imageBuffer,
3659    imageKernelBuffer,
3660    offsetBuffer;
3661
3662  cl_mem_flags
3663    mem_flags;
3664
3665  const void
3666    *inputPixels;
3667
3668  float
3669    *kernelBufferPtr;
3670
3671  Image
3672    *filteredImage;
3673
3674  int
3675    *offsetBufferPtr;
3676
3677  MagickBooleanType
3678    outputReady;
3679
3680  MagickCLDevice
3681    device;
3682
3683  PixelInfo
3684    bias;
3685
3686  MagickSizeType
3687    length;
3688
3689  size_t
3690    global_work_size[2],
3691    local_work_size[2];
3692
3693  unsigned int
3694    i,
3695    imageHeight,
3696    imageWidth,
3697    matte;
3698
3699  void
3700    *filteredPixels,
3701    *hostPtr;
3702
3703  outputReady = MagickFalse;
3704  filteredImage = NULL;
3705  filteredImage_view = NULL;
3706  imageBuffer = NULL;
3707  filteredImageBuffer = NULL;
3708  imageKernelBuffer = NULL;
3709  motionBlurKernel = NULL;
3710  queue = NULL;
3711
3712  device = RequestOpenCLDevice(clEnv);
3713
3714  /* Create and initialize OpenCL buffers. */
3715
3716  image_view=AcquireAuthenticCacheView(image,exception);
3717  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3718  if (inputPixels == (const void *) NULL)
3719  {
3720    (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3721      "UnableToReadPixelCache.","`%s'",image->filename);
3722    goto cleanup;
3723  }
3724
3725  // If the host pointer is aligned to the size of CLPixelPacket,
3726  // then use the host buffer directly from the GPU; otherwise,
3727  // create a buffer on the GPU and copy the data over
3728  if (ALIGNED(inputPixels,CLPixelPacket))
3729  {
3730    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3731  }
3732  else
3733  {
3734    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3735  }
3736  // create a CL buffer from image pixel buffer
3737  length = image->columns * image->rows;
3738  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3739    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3740  if (clStatus != CL_SUCCESS)
3741  {
3742    (void) ThrowMagickException(exception, GetMagickModule(),
3743      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3744    goto cleanup;
3745  }
3746
3747
3748  filteredImage = CloneImage(image,image->columns,image->rows,
3749    MagickTrue,exception);
3750  assert(filteredImage != NULL);
3751  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3752  {
3753    (void) ThrowMagickException(exception, GetMagickModule(),
3754      ResourceLimitError, "CloneImage failed.", ".");
3755    goto cleanup;
3756  }
3757  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3758  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3759  if (filteredPixels == (void *) NULL)
3760  {
3761    (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3762      "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3763    goto cleanup;
3764  }
3765
3766  if (ALIGNED(filteredPixels,CLPixelPacket))
3767  {
3768    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3769    hostPtr = filteredPixels;
3770  }
3771  else
3772  {
3773    mem_flags = CL_MEM_WRITE_ONLY;
3774    hostPtr = NULL;
3775  }
3776  // create a CL buffer from image pixel buffer
3777  length = image->columns * image->rows;
3778  filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3779    length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3780  if (clStatus != CL_SUCCESS)
3781  {
3782    (void) ThrowMagickException(exception, GetMagickModule(),
3783      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3784    goto cleanup;
3785  }
3786
3787
3788  imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
3789    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3790    &clStatus);
3791  if (clStatus != CL_SUCCESS)
3792  {
3793    (void) ThrowMagickException(exception, GetMagickModule(),
3794      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3795    goto cleanup;
3796  }
3797
3798  queue = AcquireOpenCLCommandQueue(device);
3799  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
3800    CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
3801  if (clStatus != CL_SUCCESS)
3802  {
3803    (void) ThrowMagickException(exception, GetMagickModule(),
3804      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3805    goto cleanup;
3806  }
3807  for (i = 0; i < width; i++)
3808  {
3809    kernelBufferPtr[i] = (float) kernel[i];
3810  }
3811  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3812    0, NULL, NULL);
3813 if (clStatus != CL_SUCCESS)
3814  {
3815    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3816      "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3817    goto cleanup;
3818  }
3819
3820  offsetBuffer = clEnv->library->clCreateBuffer(device->context,
3821    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3822    &clStatus);
3823  if (clStatus != CL_SUCCESS)
3824  {
3825    (void) ThrowMagickException(exception, GetMagickModule(),
3826      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3827    goto cleanup;
3828  }
3829
3830  offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
3831    CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3832  if (clStatus != CL_SUCCESS)
3833  {
3834    (void) ThrowMagickException(exception, GetMagickModule(),
3835      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3836    goto cleanup;
3837  }
3838  for (i = 0; i < width; i++)
3839  {
3840    offsetBufferPtr[2*i] = (int)offset[i].x;
3841    offsetBufferPtr[2*i+1] = (int)offset[i].y;
3842  }
3843  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
3844    NULL, NULL);
3845 if (clStatus != CL_SUCCESS)
3846  {
3847    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3848      "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3849    goto cleanup;
3850  }
3851
3852
3853 // get the OpenCL kernel
3854  motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
3855  if (motionBlurKernel == NULL)
3856  {
3857    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3858      "AcquireOpenCLKernel failed.", ".");
3859    goto cleanup;
3860  }
3861
3862  // set the kernel arguments
3863  i = 0;
3864  clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3865    (void *)&imageBuffer);
3866  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3867    (void *)&filteredImageBuffer);
3868  imageWidth = (unsigned int) image->columns;
3869  imageHeight = (unsigned int) image->rows;
3870  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3871    &imageWidth);
3872  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3873    &imageHeight);
3874  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3875    (void *)&imageKernelBuffer);
3876  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3877    &width);
3878  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3879    (void *)&offsetBuffer);
3880
3881  GetPixelInfo(image,&bias);
3882  biasPixel.s[0] = bias.red;
3883  biasPixel.s[1] = bias.green;
3884  biasPixel.s[2] = bias.blue;
3885  biasPixel.s[3] = bias.alpha;
3886  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3887
3888  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
3889  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3890  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3891  if (clStatus != CL_SUCCESS)
3892  {
3893    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3894      "clEnv->library->clSetKernelArg failed.", ".");
3895    goto cleanup;
3896  }
3897
3898  // launch the kernel
3899  local_work_size[0] = 16;
3900  local_work_size[1] = 16;
3901  global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3902                                (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3903  global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3904                                (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3905  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
3906	  global_work_size, local_work_size, 0, NULL, &event);
3907
3908  if (clStatus != CL_SUCCESS)
3909  {
3910    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3911      "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3912    goto cleanup;
3913  }
3914  RecordProfileData(device,motionBlurKernel,event);
3915
3916  if (ALIGNED(filteredPixels,CLPixelPacket))
3917  {
3918    length = image->columns * image->rows;
3919    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
3920      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
3921      NULL, &clStatus);
3922  }
3923  else
3924  {
3925    length = image->columns * image->rows;
3926    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
3927      length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3928  }
3929  if (clStatus != CL_SUCCESS)
3930  {
3931    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3932      "Reading output image from CL buffer failed.", ".");
3933    goto cleanup;
3934  }
3935  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3936
3937cleanup:
3938
3939  image_view=DestroyCacheView(image_view);
3940  if (filteredImage_view != NULL)
3941    filteredImage_view=DestroyCacheView(filteredImage_view);
3942
3943  if (filteredImageBuffer!=NULL)
3944    clEnv->library->clReleaseMemObject(filteredImageBuffer);
3945  if (imageBuffer!=NULL)
3946    clEnv->library->clReleaseMemObject(imageBuffer);
3947  if (imageKernelBuffer!=NULL)
3948    clEnv->library->clReleaseMemObject(imageKernelBuffer);
3949  if (motionBlurKernel!=NULL)
3950    ReleaseOpenCLKernel(motionBlurKernel);
3951  if (queue != NULL)
3952    ReleaseOpenCLCommandQueue(device,queue);
3953  if (device != NULL)
3954    ReleaseOpenCLDevice(device);
3955  if (outputReady == MagickFalse && filteredImage != NULL)
3956    filteredImage=DestroyImage(filteredImage);
3957
3958  return(filteredImage);
3959}
3960
3961MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3962  const double* kernel,const size_t width,const OffsetInfo *offset,
3963  ExceptionInfo *exception)
3964{
3965  Image
3966    *filteredImage;
3967
3968  MagickCLEnv
3969    clEnv;
3970
3971  assert(image != NULL);
3972  assert(kernel != (double *) NULL);
3973  assert(offset != (OffsetInfo *) NULL);
3974  assert(exception != (ExceptionInfo *) NULL);
3975
3976  if (checkAccelerateConditionRGBA(image) == MagickFalse)
3977    return((Image *) NULL);
3978
3979  clEnv=getOpenCLEnvironment(exception);
3980  if (clEnv == (MagickCLEnv) NULL)
3981    return((Image *) NULL);
3982
3983  filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
3984    exception);
3985  return(filteredImage);
3986}
3987
3988/*
3989%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3990%                                                                             %
3991%                                                                             %
3992%                                                                             %
3993%     A c c e l e r a t e R e s i z e I m a g e                               %
3994%                                                                             %
3995%                                                                             %
3996%                                                                             %
3997%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3998*/
3999
4000static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
4001  cl_command_queue queue,const Image *image,Image *filteredImage,
4002  cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4003  cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4004  const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4005  const float xFactor,ExceptionInfo *exception)
4006{
4007  cl_kernel
4008    horizontalKernel;
4009
4010  cl_int
4011    status;
4012
4013  const unsigned int
4014    workgroupSize = 256;
4015
4016  float
4017    resizeFilterScale,
4018    resizeFilterSupport,
4019    resizeFilterWindowSupport,
4020    resizeFilterBlur,
4021    scale,
4022    support;
4023
4024  int
4025    cacheRangeStart,
4026    cacheRangeEnd,
4027    numCachedPixels,
4028    resizeFilterType,
4029    resizeWindowType;
4030
4031  MagickBooleanType
4032    outputReady;
4033
4034  size_t
4035    gammaAccumulatorLocalMemorySize,
4036    gsize[2],
4037    i,
4038    imageCacheLocalMemorySize,
4039    pixelAccumulatorLocalMemorySize,
4040    lsize[2],
4041    totalLocalMemorySize,
4042    weightAccumulatorLocalMemorySize;
4043
4044  unsigned int
4045    chunkSize,
4046    pixelPerWorkgroup;
4047
4048  horizontalKernel=NULL;
4049  outputReady=MagickFalse;
4050
4051  /*
4052  Apply filter to resize vertically from image to resize image.
4053  */
4054  scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
4055  support=scale*GetResizeFilterSupport(resizeFilter);
4056  if (support < 0.5)
4057  {
4058    /*
4059    Support too small even for nearest neighbour: Reduce to point
4060    sampling.
4061    */
4062    support=(MagickRealType) 0.5;
4063    scale=1.0;
4064  }
4065  scale=PerceptibleReciprocal(scale);
4066
4067  if (resizedColumns < workgroupSize)
4068  {
4069    chunkSize=32;
4070    pixelPerWorkgroup=32;
4071  }
4072  else
4073  {
4074    chunkSize=workgroupSize;
4075    pixelPerWorkgroup=workgroupSize;
4076  }
4077
4078DisableMSCWarning(4127)
4079  while(1)
4080RestoreMSCWarning
4081  {
4082    /* calculate the local memory size needed per workgroup */
4083    cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
4084    cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
4085      MagickEpsilon)+support+0.5);
4086    numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4087    imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4088      number_channels;
4089    totalLocalMemorySize=imageCacheLocalMemorySize;
4090
4091    /* local size for the pixel accumulator */
4092    pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4093    totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4094
4095    /* local memory size for the weight accumulator */
4096    weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4097    totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4098
4099    /* local memory size for the gamma accumulator */
4100    if ((number_channels == 4) || (number_channels == 2))
4101      gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4102    else
4103      gammaAccumulatorLocalMemorySize=sizeof(float);
4104    totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4105
4106    if (totalLocalMemorySize <= device->local_memory_size)
4107      break;
4108    else
4109    {
4110      pixelPerWorkgroup=pixelPerWorkgroup/2;
4111      chunkSize=chunkSize/2;
4112      if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4113      {
4114        /* quit, fallback to CPU */
4115        goto cleanup;
4116      }
4117    }
4118  }
4119
4120  resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4121  resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4122
4123  horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
4124  if (horizontalKernel == (cl_kernel) NULL)
4125  {
4126    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4127      ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
4128    goto cleanup;
4129  }
4130
4131  resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4132  resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4133  resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4134  resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4135
4136  i=0;
4137  status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4138  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4139  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
4140  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
4141  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4142  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4143  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4144  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
4145  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4146  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4147  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4148  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4149  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4150  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4151  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4152  status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
4153  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
4154  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
4155  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
4156  status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
4157  status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
4158  status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
4159
4160  if (status != CL_SUCCESS)
4161  {
4162    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4163      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4164    goto cleanup;
4165  }
4166
4167  gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4168    workgroupSize;
4169  gsize[1]=resizedRows;
4170  lsize[0]=workgroupSize;
4171  lsize[1]=1;
4172  outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
4173    (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4174cleanup:
4175
4176  if (horizontalKernel != (cl_kernel) NULL)
4177    ReleaseOpenCLKernel(horizontalKernel);
4178
4179  return(outputReady);
4180}
4181
4182static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
4183  cl_command_queue queue,const Image *image,Image * filteredImage,
4184  cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4185  cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4186  const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4187  const float yFactor,ExceptionInfo *exception)
4188{
4189  cl_kernel
4190    verticalKernel;
4191
4192  cl_int
4193    status;
4194
4195  const unsigned int
4196    workgroupSize = 256;
4197
4198  float
4199    resizeFilterScale,
4200    resizeFilterSupport,
4201    resizeFilterWindowSupport,
4202    resizeFilterBlur,
4203    scale,
4204    support;
4205
4206  int
4207    cacheRangeStart,
4208    cacheRangeEnd,
4209    numCachedPixels,
4210    resizeFilterType,
4211    resizeWindowType;
4212
4213  MagickBooleanType
4214    outputReady;
4215
4216  size_t
4217    gammaAccumulatorLocalMemorySize,
4218    gsize[2],
4219    i,
4220    imageCacheLocalMemorySize,
4221    pixelAccumulatorLocalMemorySize,
4222    lsize[2],
4223    totalLocalMemorySize,
4224    weightAccumulatorLocalMemorySize;
4225
4226  unsigned int
4227    chunkSize,
4228    pixelPerWorkgroup;
4229
4230  verticalKernel=NULL;
4231  outputReady=MagickFalse;
4232
4233  /*
4234  Apply filter to resize vertically from image to resize image.
4235  */
4236  scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4237  support=scale*GetResizeFilterSupport(resizeFilter);
4238  if (support < 0.5)
4239  {
4240    /*
4241    Support too small even for nearest neighbour: Reduce to point
4242    sampling.
4243    */
4244    support=(MagickRealType) 0.5;
4245    scale=1.0;
4246  }
4247  scale=PerceptibleReciprocal(scale);
4248
4249  if (resizedRows < workgroupSize)
4250  {
4251    chunkSize=32;
4252    pixelPerWorkgroup=32;
4253  }
4254  else
4255  {
4256    chunkSize=workgroupSize;
4257    pixelPerWorkgroup=workgroupSize;
4258  }
4259
4260DisableMSCWarning(4127)
4261  while(1)
4262RestoreMSCWarning
4263  {
4264    /* calculate the local memory size needed per workgroup */
4265    cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4266    cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
4267      MagickEpsilon)+support+0.5);
4268    numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4269    imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4270      number_channels;
4271    totalLocalMemorySize=imageCacheLocalMemorySize;
4272
4273    /* local size for the pixel accumulator */
4274    pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4275    totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4276
4277    /* local memory size for the weight accumulator */
4278    weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4279    totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4280
4281    /* local memory size for the gamma accumulator */
4282    if ((number_channels == 4) || (number_channels == 2))
4283      gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4284    else
4285      gammaAccumulatorLocalMemorySize=sizeof(float);
4286    totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4287
4288    if (totalLocalMemorySize <= device->local_memory_size)
4289      break;
4290    else
4291    {
4292      pixelPerWorkgroup=pixelPerWorkgroup/2;
4293      chunkSize=chunkSize/2;
4294      if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4295      {
4296        /* quit, fallback to CPU */
4297        goto cleanup;
4298      }
4299    }
4300  }
4301
4302  resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4303  resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4304
4305  verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
4306  if (verticalKernel == (cl_kernel) NULL)
4307  {
4308    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4309      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4310    goto cleanup;
4311  }
4312
4313  resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4314  resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4315  resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4316  resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4317
4318  i=0;
4319  status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4320  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4321  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
4322  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
4323  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4324  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4325  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4326  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
4327  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4328  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4329  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4330  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4331  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4332  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4333  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4334  status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
4335  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
4336  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
4337  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
4338  status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
4339  status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
4340  status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
4341
4342  if (status != CL_SUCCESS)
4343  {
4344    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4345      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4346    goto cleanup;
4347  }
4348
4349  gsize[0]=resizedColumns;
4350  gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4351    workgroupSize;
4352  lsize[0]=1;
4353  lsize[1]=workgroupSize;
4354  outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
4355    gsize,lsize,image,filteredImage,exception);
4356
4357cleanup:
4358
4359  if (verticalKernel != (cl_kernel) NULL)
4360    ReleaseOpenCLKernel(verticalKernel);
4361
4362  return(outputReady);
4363}
4364
4365static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
4366  const size_t resizedColumns,const size_t resizedRows,
4367  const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4368{
4369  cl_command_queue
4370    queue;
4371
4372  cl_mem
4373    cubicCoefficientsBuffer,
4374    filteredImageBuffer,
4375    imageBuffer,
4376    tempImageBuffer;
4377
4378  cl_uint
4379    number_channels;
4380
4381  const double
4382    *resizeFilterCoefficient;
4383
4384  float
4385    coefficientBuffer[7],
4386    xFactor,
4387    yFactor;
4388
4389  MagickBooleanType
4390    outputReady;
4391
4392  MagickCLDevice
4393    device;
4394
4395  MagickSizeType
4396    length;
4397
4398  Image
4399    *filteredImage;
4400
4401  size_t
4402    i;
4403
4404  filteredImage=NULL;
4405  tempImageBuffer=NULL;
4406  cubicCoefficientsBuffer=NULL;
4407  outputReady=MagickFalse;
4408
4409  device=RequestOpenCLDevice(clEnv);
4410  queue=AcquireOpenCLCommandQueue(device);
4411  filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
4412    exception);
4413  if (filteredImage == (Image *) NULL)
4414    goto cleanup;
4415  if (filteredImage->number_channels != image->number_channels)
4416    goto cleanup;
4417  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4418  if (imageBuffer == (cl_mem) NULL)
4419    goto cleanup;
4420  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4421  if (filteredImageBuffer == (cl_mem) NULL)
4422    goto cleanup;
4423
4424  resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4425  for (i = 0; i < 7; i++)
4426    coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4427  cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
4428    CL_MEM_READ_ONLY,7*sizeof(*resizeFilterCoefficient),&coefficientBuffer);
4429  if (cubicCoefficientsBuffer == (cl_mem) NULL)
4430  {
4431    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4432      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4433    goto cleanup;
4434  }
4435
4436  number_channels=(cl_uint) image->number_channels;
4437  xFactor=(float) resizedColumns/(float) image->columns;
4438  yFactor=(float) resizedRows/(float) image->rows;
4439  if (xFactor > yFactor)
4440  {
4441    length=resizedColumns*image->rows*number_channels;
4442    tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4443      sizeof(CLQuantum),(void *) NULL);
4444    if (tempImageBuffer == (cl_mem) NULL)
4445    {
4446      (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4447        ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4448      goto cleanup;
4449    }
4450
4451    outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4452      imageBuffer,number_channels,(cl_uint) image->columns,
4453      (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
4454      (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4455      exception);
4456    if (outputReady == MagickFalse)
4457      goto cleanup;
4458
4459    outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4460      tempImageBuffer,number_channels,(cl_uint) resizedColumns,
4461      (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
4462      (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4463      exception);
4464    if (outputReady == MagickFalse)
4465      goto cleanup;
4466  }
4467  else
4468  {
4469    length=image->columns*resizedRows*number_channels;
4470    tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,
4471      length*sizeof(CLQuantum),(void *) NULL);
4472    if (tempImageBuffer == (cl_mem) NULL)
4473    {
4474      (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4475        ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4476      goto cleanup;
4477    }
4478
4479    outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4480      imageBuffer,number_channels,(cl_uint) image->columns,
4481      (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
4482      (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4483      exception);
4484    if (outputReady == MagickFalse)
4485      goto cleanup;
4486
4487    outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4488      tempImageBuffer,number_channels,(cl_uint) image->columns,
4489      (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
4490      (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4491      exception);
4492    if (outputReady == MagickFalse)
4493      goto cleanup;
4494  }
4495
4496cleanup:
4497
4498  if (tempImageBuffer != (cl_mem) NULL)
4499    ReleaseOpenCLMemObject(tempImageBuffer);
4500  if (cubicCoefficientsBuffer != (cl_mem) NULL)
4501    ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
4502  if (queue != (cl_command_queue) NULL)
4503    ReleaseOpenCLCommandQueue(device,queue);
4504  if (device != (MagickCLDevice) NULL)
4505    ReleaseOpenCLDevice(device);
4506  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4507    filteredImage=DestroyImage(filteredImage);
4508
4509  return(filteredImage);
4510}
4511
4512static MagickBooleanType gpuSupportedResizeWeighting(
4513  ResizeWeightingFunctionType f)
4514{
4515  unsigned int
4516    i;
4517
4518  for (i = 0; ;i++)
4519  {
4520    if (supportedResizeWeighting[i] == LastWeightingFunction)
4521      break;
4522    if (supportedResizeWeighting[i] == f)
4523      return(MagickTrue);
4524  }
4525  return(MagickFalse);
4526}
4527
4528MagickPrivate Image *AccelerateResizeImage(const Image *image,
4529  const size_t resizedColumns,const size_t resizedRows,
4530  const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4531{
4532  Image
4533    *filteredImage;
4534
4535  MagickCLEnv
4536    clEnv;
4537
4538  assert(image != NULL);
4539  assert(exception != (ExceptionInfo *) NULL);
4540
4541  if (checkAccelerateCondition(image) == MagickFalse)
4542    return((Image *) NULL);
4543
4544  if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
4545         resizeFilter)) == MagickFalse) ||
4546      (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
4547         resizeFilter)) == MagickFalse))
4548    return((Image *) NULL);
4549
4550  clEnv=getOpenCLEnvironment(exception);
4551  if (clEnv == (MagickCLEnv) NULL)
4552    return((Image *) NULL);
4553
4554  filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
4555    resizeFilter,exception);
4556  return(filteredImage);
4557}
4558
4559/*
4560%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4561%                                                                             %
4562%                                                                             %
4563%                                                                             %
4564%     A c c e l e r a t e R o t a t i o n a l B l u r I m a g e               %
4565%                                                                             %
4566%                                                                             %
4567%                                                                             %
4568%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4569*/
4570
4571static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
4572  const double angle,ExceptionInfo *exception)
4573{
4574  cl_command_queue
4575    queue;
4576
4577  cl_float2
4578    blurCenter;
4579
4580  cl_float4
4581    biasPixel;
4582
4583  cl_int
4584    status;
4585
4586  cl_mem
4587    cosThetaBuffer,
4588    filteredImageBuffer,
4589    imageBuffer,
4590    sinThetaBuffer;
4591
4592  cl_kernel
4593    rotationalBlurKernel;
4594
4595  cl_uint
4596    cossin_theta_size,
4597    number_channels;
4598
4599  float
4600    blurRadius,
4601    *cosThetaPtr,
4602    offset,
4603    *sinThetaPtr,
4604    theta;
4605
4606  Image
4607    *filteredImage;
4608
4609  MagickBooleanType
4610    outputReady;
4611
4612  MagickCLDevice
4613    device;
4614
4615  PixelInfo
4616    bias;
4617
4618  size_t
4619    gsize[2],
4620    i;
4621
4622  filteredImage=NULL;
4623  sinThetaBuffer=NULL;
4624  cosThetaBuffer=NULL;
4625  rotationalBlurKernel=NULL;
4626  outputReady=MagickFalse;
4627
4628  device=RequestOpenCLDevice(clEnv);
4629  queue=AcquireOpenCLCommandQueue(device);
4630  filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
4631    exception);
4632  if (filteredImage == (Image *) NULL)
4633    goto cleanup;
4634  if (filteredImage->number_channels != image->number_channels)
4635    goto cleanup;
4636  copyPixels(image,filteredImage,exception);
4637  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4638  if (imageBuffer == (cl_mem) NULL)
4639    goto cleanup;
4640  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4641  if (filteredImageBuffer == (cl_mem) NULL)
4642    goto cleanup;
4643
4644  blurCenter.s[0]=(float) (image->columns-1)/2.0;
4645  blurCenter.s[1]=(float) (image->rows-1)/2.0;
4646  blurRadius=hypot(blurCenter.s[0],blurCenter.s[1]);
4647  cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt((double)blurRadius)+2UL);
4648
4649  cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4650  if (cosThetaPtr == (float *) NULL)
4651    goto cleanup;
4652  sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4653  if (sinThetaPtr == (float *) NULL)
4654  {
4655    cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4656    goto cleanup;
4657  }
4658
4659  theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
4660  offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
4661  for (i=0; i < (ssize_t) cossin_theta_size; i++)
4662  {
4663    cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
4664    sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
4665  }
4666
4667  sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4668    CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
4669  sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
4670  cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4671    CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
4672  cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4673  if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem)NULL))
4674  {
4675    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4676      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4677    goto cleanup;
4678  }
4679
4680  rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
4681  if (rotationalBlurKernel == (cl_kernel) NULL)
4682  {
4683    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4684      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4685    goto cleanup;
4686  }
4687
4688  GetPixelInfo(image,&bias);
4689  biasPixel.s[0]=bias.red;
4690  biasPixel.s[1]=bias.green;
4691  biasPixel.s[2]=bias.blue;
4692  biasPixel.s[3]=bias.alpha;
4693
4694  number_channels=(cl_uint) image->number_channels;
4695
4696  i=0;
4697  status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4698  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
4699  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
4700  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float4), &biasPixel);
4701  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
4702  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
4703  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
4704  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
4705  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4706  if (status != CL_SUCCESS)
4707  {
4708    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4709      ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4710    goto cleanup;
4711  }
4712
4713  gsize[0]=image->columns;
4714  gsize[1]=image->rows;
4715  outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
4716    (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
4717    exception);
4718
4719cleanup:
4720
4721  if (sinThetaBuffer != (cl_mem) NULL)
4722    ReleaseOpenCLMemObject(sinThetaBuffer);
4723  if (cosThetaBuffer != (cl_mem) NULL)
4724    ReleaseOpenCLMemObject(cosThetaBuffer);
4725  if (rotationalBlurKernel != (cl_kernel) NULL)
4726    ReleaseOpenCLKernel(rotationalBlurKernel);
4727  if (queue != (cl_command_queue) NULL)
4728    ReleaseOpenCLCommandQueue(device,queue);
4729  if (device != (MagickCLDevice) NULL)
4730    ReleaseOpenCLDevice(device);
4731  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4732    filteredImage=DestroyImage(filteredImage);
4733
4734  return(filteredImage);
4735}
4736
4737MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
4738  const double angle,ExceptionInfo *exception)
4739{
4740  Image
4741    *filteredImage;
4742
4743  MagickCLEnv
4744    clEnv;
4745
4746  assert(image != NULL);
4747  assert(exception != (ExceptionInfo *) NULL);
4748
4749  if (checkAccelerateCondition(image) == MagickFalse)
4750    return((Image *) NULL);
4751
4752  clEnv=getOpenCLEnvironment(exception);
4753  if (clEnv == (MagickCLEnv) NULL)
4754    return((Image *) NULL);
4755
4756  filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
4757  return filteredImage;
4758}
4759
4760/*
4761%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4762%                                                                             %
4763%                                                                             %
4764%                                                                             %
4765%     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
4766%                                                                             %
4767%                                                                             %
4768%                                                                             %
4769%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4770*/
4771
4772static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
4773  const double radius,const double sigma,const double gain,
4774  const double threshold,ExceptionInfo *exception)
4775{
4776  cl_command_queue
4777    queue;
4778
4779  cl_int
4780    status;
4781
4782  cl_kernel
4783    blurRowKernel,
4784    unsharpMaskBlurColumnKernel;
4785
4786  cl_mem
4787    filteredImageBuffer,
4788    imageBuffer,
4789    imageKernelBuffer,
4790    tempImageBuffer;
4791
4792  cl_uint
4793    imageColumns,
4794    imageRows,
4795    kernelWidth,
4796    number_channels;
4797
4798  float
4799    fGain,
4800    fThreshold;
4801
4802  Image
4803    *filteredImage;
4804
4805  int
4806    chunkSize;
4807
4808  MagickBooleanType
4809    outputReady;
4810
4811  MagickCLDevice
4812    device;
4813
4814  MagickSizeType
4815    length;
4816
4817  size_t
4818    gsize[2],
4819    i,
4820    lsize[2];
4821
4822  filteredImage=NULL;
4823  tempImageBuffer=NULL;
4824  imageKernelBuffer=NULL;
4825  blurRowKernel=NULL;
4826  unsharpMaskBlurColumnKernel=NULL;
4827  outputReady=MagickFalse;
4828
4829  device=RequestOpenCLDevice(clEnv);
4830  queue=AcquireOpenCLCommandQueue(device);
4831  filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
4832    exception);
4833  if (filteredImage == (Image *) NULL)
4834    goto cleanup;
4835  if (filteredImage->number_channels != image->number_channels)
4836    goto cleanup;
4837  copyPixels(image,filteredImage,exception);
4838  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4839  if (imageBuffer == (cl_mem) NULL)
4840    goto cleanup;
4841  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4842  if (filteredImageBuffer == (cl_mem) NULL)
4843    goto cleanup;
4844
4845  imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4846    exception);
4847
4848  length=image->columns*image->rows;
4849  tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4850    sizeof(cl_float4),NULL);
4851  if (tempImageBuffer == (cl_mem) NULL)
4852  {
4853    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4854      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4855    goto cleanup;
4856  }
4857
4858  blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
4859  if (blurRowKernel == (cl_kernel) NULL)
4860  {
4861    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4862      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4863    goto cleanup;
4864  }
4865
4866  unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
4867    "UnsharpMaskBlurColumn");
4868  if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
4869  {
4870    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4871      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4872    goto cleanup;
4873  }
4874
4875  number_channels=(cl_uint) image->number_channels;
4876  imageColumns=(cl_uint) image->columns;
4877  imageRows=(cl_uint) image->rows;
4878
4879  chunkSize = 256;
4880
4881  i=0;
4882  status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4883  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
4884  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
4885  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4886  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4887  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4888  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4889  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
4890  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4891  if (status != CL_SUCCESS)
4892  {
4893    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4894      ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4895    goto cleanup;
4896  }
4897
4898  gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
4899  gsize[1]=image->rows;
4900  lsize[0]=chunkSize;
4901  lsize[1]=1;
4902  outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
4903    (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4904
4905  chunkSize=256;
4906  fGain=(float) gain;
4907  fThreshold=(float) threshold;
4908
4909  i=0;
4910  status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4911  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4912  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
4913  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
4914  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4915  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4916  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4917  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
4918  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4919  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4920  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4921  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4922  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4923  if (status != CL_SUCCESS)
4924  {
4925    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4926      ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4927    goto cleanup;
4928  }
4929
4930  gsize[0]=image->columns;
4931  gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
4932  lsize[0]=1;
4933  lsize[1]=chunkSize;
4934  outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
4935    (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4936
4937cleanup:
4938
4939  if (tempImageBuffer != (cl_mem) NULL)
4940    ReleaseOpenCLMemObject(tempImageBuffer);
4941  if (imageKernelBuffer != (cl_mem) NULL)
4942    ReleaseOpenCLMemObject(imageKernelBuffer);
4943  if (blurRowKernel != (cl_kernel) NULL)
4944    ReleaseOpenCLKernel(blurRowKernel);
4945  if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
4946    ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
4947  if (queue != (cl_command_queue) NULL)
4948    ReleaseOpenCLCommandQueue(device,queue);
4949  if (device != (MagickCLDevice) NULL)
4950    ReleaseOpenCLDevice(device);
4951  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4952    filteredImage=DestroyImage(filteredImage);
4953
4954  return(filteredImage);
4955}
4956
4957static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4958  MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
4959  const double threshold,ExceptionInfo *exception)
4960{
4961  cl_command_queue
4962    queue;
4963
4964  cl_int
4965    status;
4966
4967  cl_kernel
4968    unsharpMaskKernel;
4969
4970  cl_mem
4971    filteredImageBuffer,
4972    imageBuffer,
4973    imageKernelBuffer;
4974
4975  cl_uint
4976    imageColumns,
4977    imageRows,
4978    kernelWidth,
4979    number_channels;
4980
4981  float
4982    fGain,
4983    fThreshold;
4984
4985  Image
4986    *filteredImage;
4987
4988  MagickBooleanType
4989    outputReady;
4990
4991  MagickCLDevice
4992    device;
4993
4994  size_t
4995    gsize[2],
4996    i,
4997    lsize[2];
4998
4999  filteredImage=NULL;
5000  imageKernelBuffer=NULL;
5001  unsharpMaskKernel=NULL;
5002  outputReady=MagickFalse;
5003
5004  device=RequestOpenCLDevice(clEnv);
5005  queue=AcquireOpenCLCommandQueue(device);
5006  filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
5007    exception);
5008  if (filteredImage == (Image *) NULL)
5009    goto cleanup;
5010  if (filteredImage->number_channels != image->number_channels)
5011    goto cleanup;
5012  copyPixels(image,filteredImage,exception);
5013  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5014  if (imageBuffer == (cl_mem) NULL)
5015    goto cleanup;
5016  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5017  if (filteredImageBuffer == (cl_mem) NULL)
5018    goto cleanup;
5019
5020  imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
5021    exception);
5022
5023  unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
5024  if (unsharpMaskKernel == NULL)
5025  {
5026    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5027      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5028    goto cleanup;
5029  }
5030
5031  imageColumns=(cl_uint) image->columns;
5032  imageRows=(cl_uint) image->rows;
5033  number_channels=(cl_uint) image->number_channels;
5034  fGain=(float) gain;
5035  fThreshold=(float) threshold;
5036
5037  i=0;
5038  status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5039  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5040  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
5041  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
5042  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
5043  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
5044  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
5045  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
5046  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
5047  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
5048  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5049  if (status != CL_SUCCESS)
5050  {
5051    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5052      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5053    goto cleanup;
5054  }
5055
5056  gsize[0]=((image->columns + 7) / 8)*8;
5057  gsize[1]=((image->rows + 31) / 32)*32;
5058  lsize[0]=8;
5059  lsize[1]=32;
5060  outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
5061    gsize,lsize,image,filteredImage,exception);
5062
5063cleanup:
5064
5065  if (imageKernelBuffer != (cl_mem) NULL)
5066    ReleaseOpenCLMemObject(imageKernelBuffer);
5067  if (unsharpMaskKernel != (cl_kernel) NULL)
5068    ReleaseOpenCLKernel(unsharpMaskKernel);
5069  if (queue != (cl_command_queue) NULL)
5070    ReleaseOpenCLCommandQueue(device,queue);
5071  if (device != (MagickCLDevice) NULL)
5072    ReleaseOpenCLDevice(device);
5073  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5074    filteredImage=DestroyImage(filteredImage);
5075
5076  return(filteredImage);
5077}
5078
5079MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
5080  const double radius,const double sigma,const double gain,
5081  const double threshold,ExceptionInfo *exception)
5082{
5083  Image
5084    *filteredImage;
5085
5086  MagickCLEnv
5087    clEnv;
5088
5089  assert(image != NULL);
5090  assert(exception != (ExceptionInfo *) NULL);
5091
5092  if (checkAccelerateCondition(image) == MagickFalse)
5093    return((Image *) NULL);
5094
5095  clEnv=getOpenCLEnvironment(exception);
5096  if (clEnv == (MagickCLEnv) NULL)
5097    return((Image *) NULL);
5098
5099  if (radius < 12.1)
5100    filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
5101      threshold,exception);
5102  else
5103    filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
5104      threshold,exception);
5105  return(filteredImage);
5106}
5107
5108static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
5109  const double threshold,ExceptionInfo *exception)
5110{
5111  cl_command_queue
5112    queue;
5113
5114  const cl_int
5115    PASSES=5;
5116
5117  const int
5118    TILESIZE=64,
5119    PAD=1<<(PASSES-1),
5120    SIZE=TILESIZE-2*PAD;
5121
5122  cl_float
5123    thresh;
5124
5125  cl_int
5126    status;
5127
5128  cl_kernel
5129    denoiseKernel;
5130
5131  cl_mem
5132    filteredImageBuffer,
5133    imageBuffer;
5134
5135  cl_uint
5136    number_channels,
5137    width,
5138    height,
5139    max_channels;
5140
5141  Image
5142    *filteredImage;
5143
5144  MagickBooleanType
5145    outputReady;
5146
5147  MagickCLDevice
5148    device;
5149
5150  size_t
5151    gsize[2],
5152    i,
5153    lsize[2];
5154
5155  filteredImage=NULL;
5156  denoiseKernel=NULL;
5157  outputReady=MagickFalse;
5158
5159  device=RequestOpenCLDevice(clEnv);
5160  queue=AcquireOpenCLCommandQueue(device);
5161  filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
5162    exception);
5163  if (filteredImage == (Image *) NULL)
5164    goto cleanup;
5165  if (filteredImage->number_channels != image->number_channels)
5166    goto cleanup;
5167  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5168  if (imageBuffer == (cl_mem) NULL)
5169    goto cleanup;
5170  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5171  if (filteredImageBuffer == (cl_mem) NULL)
5172    goto cleanup;
5173
5174  denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
5175  if (denoiseKernel == (cl_kernel) NULL)
5176  {
5177    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5178      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5179    goto cleanup;
5180  }
5181
5182  number_channels=(cl_uint)image->number_channels;
5183  width=(cl_uint)image->columns;
5184  height=(cl_uint)image->rows;
5185  max_channels=number_channels;
5186  if ((max_channels == 4) || (max_channels == 2))
5187    max_channels=max_channels-1;
5188  thresh=threshold;
5189
5190  i=0;
5191  status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5192  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5193  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5194  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
5195  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
5196  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
5197  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
5198  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
5199  if (status != CL_SUCCESS)
5200  {
5201    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5202      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5203    goto cleanup;
5204  }
5205
5206  gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
5207  gsize[1]=((height+(SIZE-1))/SIZE)*4;
5208  lsize[0]=TILESIZE;
5209  lsize[1]=4;
5210  outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,(const size_t *) NULL,
5211    gsize,lsize,image,filteredImage,exception);
5212
5213cleanup:
5214
5215  if (denoiseKernel != (cl_kernel) NULL)
5216    ReleaseOpenCLKernel(denoiseKernel);
5217  if (queue != (cl_command_queue) NULL)
5218    ReleaseOpenCLCommandQueue(device,queue);
5219  if (device != (MagickCLDevice) NULL)
5220    ReleaseOpenCLDevice(device);
5221  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5222    filteredImage=DestroyImage(filteredImage);
5223
5224  return(filteredImage);
5225}
5226
5227MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5228  const double threshold,ExceptionInfo *exception)
5229{
5230  Image
5231    *filteredImage;
5232
5233  MagickCLEnv
5234    clEnv;
5235
5236  assert(image != NULL);
5237  assert(exception != (ExceptionInfo *)NULL);
5238
5239  if (checkAccelerateCondition(image) == MagickFalse)
5240    return((Image *) NULL);
5241
5242  clEnv=getOpenCLEnvironment(exception);
5243  if (clEnv == (MagickCLEnv) NULL)
5244    return((Image *) NULL);
5245
5246  filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
5247
5248  return(filteredImage);
5249}
5250#endif /* MAGICKCORE_OPENCL_SUPPORT */
5251