1/*
2%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3%                                                                             %
4%                                                                             %
5%                                                                             %
6%     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
7%    A   A   C       C      E      L      E      R   R  A   A    T    E       %
8%    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
9%    A   A   C       C      E      L      E      R R    A   A    T    E       %
10%    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
11%                                                                             %
12%                                                                             %
13%                       MagickCore Acceleration Methods                       %
14%                                                                             %
15%                              Software Design                                %
16%                                  Cristy                                     %
17%                               SiuChi Chan                                   %
18%                              Guansong Zhang                                 %
19%                               January 2010                                  %
20%                               Dirk Lemstra                                  %
21%                                April 2016                                   %
22%                                                                             %
23%                                                                             %
24%  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
25%  dedicated to making software imaging solutions freely available.           %
26%                                                                             %
27%  You may not use this file except in compliance with the License.  You may  %
28%  obtain a copy of the License at                                            %
29%                                                                             %
30%    http://www.imagemagick.org/script/license.php                            %
31%                                                                             %
32%  Unless required by applicable law or agreed to in writing, software        %
33%  distributed under the License is distributed on an "AS IS" BASIS,          %
34%  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
35%  See the License for the specific language governing permissions and        %
36%  limitations under the License.                                             %
37%                                                                             %
38%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39*/
40
41/*
42Include declarations.
43*/
44#include "MagickCore/studio.h"
45#include "MagickCore/accelerate-private.h"
46#include "MagickCore/accelerate-kernels-private.h"
47#include "MagickCore/artifact.h"
48#include "MagickCore/cache.h"
49#include "MagickCore/cache-private.h"
50#include "MagickCore/cache-view.h"
51#include "MagickCore/color-private.h"
52#include "MagickCore/delegate-private.h"
53#include "MagickCore/enhance.h"
54#include "MagickCore/exception.h"
55#include "MagickCore/exception-private.h"
56#include "MagickCore/gem.h"
57#include "MagickCore/image.h"
58#include "MagickCore/image-private.h"
59#include "MagickCore/linked-list.h"
60#include "MagickCore/list.h"
61#include "MagickCore/memory_.h"
62#include "MagickCore/monitor-private.h"
63#include "MagickCore/opencl.h"
64#include "MagickCore/opencl-private.h"
65#include "MagickCore/option.h"
66#include "MagickCore/pixel-accessor.h"
67#include "MagickCore/pixel-private.h"
68#include "MagickCore/prepress.h"
69#include "MagickCore/quantize.h"
70#include "MagickCore/quantum-private.h"
71#include "MagickCore/random_.h"
72#include "MagickCore/random-private.h"
73#include "MagickCore/registry.h"
74#include "MagickCore/resize.h"
75#include "MagickCore/resize-private.h"
76#include "MagickCore/semaphore.h"
77#include "MagickCore/splay-tree.h"
78#include "MagickCore/statistic.h"
79#include "MagickCore/string_.h"
80#include "MagickCore/string-private.h"
81#include "MagickCore/token.h"
82
83#define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
84#define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
85
86#if defined(MAGICKCORE_OPENCL_SUPPORT)
87
88/*
89  Define declarations.
90*/
91#define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
92
93/*
94  Static declarations.
95*/
96static const ResizeWeightingFunctionType supportedResizeWeighting[] =
97{
98  BoxWeightingFunction,
99  TriangleWeightingFunction,
100  HannWeightingFunction,
101  HammingWeightingFunction,
102  BlackmanWeightingFunction,
103  CubicBCWeightingFunction,
104  SincWeightingFunction,
105  SincFastWeightingFunction,
106  LastWeightingFunction
107};
108
109/*
110  Helper functions.
111*/
112static MagickBooleanType checkAccelerateCondition(const Image* image)
113{
114  /* check if the image's colorspace is supported */
115  if (image->colorspace != RGBColorspace &&
116      image->colorspace != sRGBColorspace &&
117      image->colorspace != GRAYColorspace)
118    return(MagickFalse);
119
120  /* check if the virtual pixel method is compatible with the OpenCL implementation */
121  if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
122      (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
123    return(MagickFalse);
124
125  /* check if the image has read / write mask */
126  if (image->read_mask != MagickFalse || image->write_mask != MagickFalse)
127    return(MagickFalse);
128
129  if (image->number_channels > 4)
130    return(MagickFalse);
131
132  /* check if pixel order is R */
133  if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
134    return(MagickFalse);
135
136  if (image->number_channels == 1)
137    return(MagickTrue);
138
139  /* check if pixel order is RA */
140  if ((image->number_channels == 2) &&
141      (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
142    return(MagickTrue);
143
144  if (image->number_channels == 2)
145    return(MagickFalse);
146
147  /* check if pixel order is RGB */
148  if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
149      (GetPixelChannelOffset(image,BluePixelChannel) != 2))
150    return(MagickFalse);
151
152  if (image->number_channels == 3)
153    return(MagickTrue);
154
155  /* check if pixel order is RGBA */
156  if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
157    return(MagickFalse);
158
159  return(MagickTrue);
160}
161
162static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
163{
164  if (checkAccelerateCondition(image) == MagickFalse)
165    return(MagickFalse);
166
167  /* the order will be RGBA if the image has 4 channels */
168  if (image->number_channels != 4)
169    return(MagickFalse);
170
171  if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
172      (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
173      (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
174      (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
175    return(MagickFalse);
176
177  return(MagickTrue);
178}
179
180static MagickBooleanType checkPixelIntensity(const Image *image,
181  const PixelIntensityMethod method)
182{
183  /* EncodePixelGamma and DecodePixelGamma are not supported */
184  if ((method == Rec601LumaPixelIntensityMethod) ||
185      (method == Rec709LumaPixelIntensityMethod))
186    {
187      if (image->colorspace == RGBColorspace)
188        return(MagickFalse);
189    }
190
191  if ((method == Rec601LuminancePixelIntensityMethod) ||
192      (method == Rec709LuminancePixelIntensityMethod))
193    {
194      if (image->colorspace == sRGBColorspace)
195        return(MagickFalse);
196    }
197
198  return(MagickTrue);
199}
200
201static MagickBooleanType checkHistogramCondition(const Image *image,
202  const PixelIntensityMethod method)
203{
204  /* ensure this is the only pass get in for now. */
205  if ((image->channel_mask & SyncChannels) == 0)
206    return MagickFalse;
207
208  return(checkPixelIntensity(image,method));
209}
210
211static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
212{
213  MagickCLEnv
214    clEnv;
215
216  clEnv=GetCurrentOpenCLEnv();
217  if (clEnv == (MagickCLEnv) NULL)
218    return((MagickCLEnv) NULL);
219
220  if (clEnv->enabled == MagickFalse)
221    return((MagickCLEnv) NULL);
222
223  if (InitializeOpenCL(clEnv,exception) == MagickFalse)
224    return((MagickCLEnv) NULL);
225
226  return(clEnv);
227}
228
229static Image *cloneImage(const Image* image,ExceptionInfo *exception)
230{
231  Image
232    *clone;
233
234  if (((image->channel_mask & RedChannel) != 0) &&
235      ((image->channel_mask & GreenChannel) != 0) &&
236      ((image->channel_mask & BlueChannel) != 0) &&
237      ((image->channel_mask & AlphaChannel) != 0))
238    clone=CloneImage(image,image->columns,image->rows,MagickTrue,exception);
239  else
240    {
241      clone=CloneImage(image,0,0,MagickTrue,exception);
242      if (clone != (Image *) NULL)
243        SyncImagePixelCache(clone,exception);
244    }
245  return(clone);
246}
247
248/* pad the global workgroup size to the next multiple of
249   the local workgroup size */
250inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
251  const unsigned int orgGlobalSize,const unsigned int localGroupSize)
252{
253  return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
254}
255
256static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
257  const double sigma,cl_uint *width,ExceptionInfo *exception)
258{
259  char
260    geometry[MagickPathExtent];
261
262  cl_int
263    status;
264
265  cl_mem
266    imageKernelBuffer;
267
268  float
269    *kernelBufferPtr;
270
271  KernelInfo
272    *kernel;
273
274  ssize_t
275    i;
276
277  (void) FormatLocaleString(geometry,MagickPathExtent,
278    "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
279  kernel=AcquireKernelInfo(geometry,exception);
280  if (kernel == (KernelInfo *) NULL)
281  {
282    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
283      ResourceLimitWarning,"AcquireKernelInfo failed.",".");
284    return((cl_mem) NULL);
285  }
286  kernelBufferPtr=(float *)AcquireMagickMemory(kernel->width*
287    sizeof(*kernelBufferPtr));
288  for (i = 0; i < (ssize_t) kernel->width; i++)
289    kernelBufferPtr[i] = (float)kernel->values[i];
290  imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
291    CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
292  *width=kernel->width;
293  kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
294  kernel=DestroyKernelInfo(kernel);
295  if (imageKernelBuffer == (cl_mem) NULL)
296    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
297      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
298  return(imageKernelBuffer);
299}
300
301static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
302  MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
303  cl_mem histogramBuffer,Image *image,const ChannelType channel,
304  ExceptionInfo *exception)
305{
306  MagickBooleanType
307    outputReady;
308
309  cl_int
310    clStatus;
311
312  cl_kernel
313    histogramKernel;
314
315  cl_event
316    event;
317
318  cl_uint
319    colorspace,
320    method;
321
322  register ssize_t
323    i;
324
325  size_t
326    global_work_size[2];
327
328  histogramKernel = NULL;
329
330  outputReady = MagickFalse;
331  colorspace = image->colorspace;
332  method = image->intensity;
333
334  /* get the OpenCL kernel */
335  histogramKernel = AcquireOpenCLKernel(device,"Histogram");
336  if (histogramKernel == NULL)
337  {
338    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
339    goto cleanup;
340  }
341
342  /* set the kernel arguments */
343  i = 0;
344  clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
345  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
346  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
347  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
348  clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
349  if (clStatus != CL_SUCCESS)
350  {
351    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
352    goto cleanup;
353  }
354
355  /* launch the kernel */
356  global_work_size[0] = image->columns;
357  global_work_size[1] = image->rows;
358
359  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
360
361  if (clStatus != CL_SUCCESS)
362  {
363    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
364    goto cleanup;
365  }
366  RecordProfileData(device,histogramKernel,event);
367
368  outputReady = MagickTrue;
369
370cleanup:
371
372  if (histogramKernel!=NULL)
373    ReleaseOpenCLKernel(histogramKernel);
374
375  return(outputReady);
376}
377
378/*
379%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
380%                                                                             %
381%                                                                             %
382%                                                                             %
383%     A c c e l e r a t e A d d N o i s e I m a g e                           %
384%                                                                             %
385%                                                                             %
386%                                                                             %
387%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
388*/
389
390static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
391  const NoiseType noise_type,ExceptionInfo *exception)
392{
393  cl_command_queue
394    queue;
395
396  cl_float
397    attenuate;
398
399  cl_int
400    status;
401
402  cl_kernel
403    addNoiseKernel;
404
405  cl_mem
406    filteredImageBuffer,
407    imageBuffer;
408
409  cl_uint
410    bufferLength,
411    inputPixelCount,
412    number_channels,
413    numRandomNumberPerPixel,
414    pixelsPerWorkitem,
415    seed0,
416    seed1,
417    workItemCount;
418
419  const char
420    *option;
421
422  const unsigned long
423    *s;
424
425  MagickBooleanType
426    outputReady;
427
428  MagickCLDevice
429    device;
430
431  Image
432    *filteredImage;
433
434  RandomInfo
435    *randomInfo;
436
437  size_t
438    gsize[1],
439    i,
440    lsize[1],
441    numRandPerChannel;
442
443  filteredImage=NULL;
444  addNoiseKernel=NULL;
445  outputReady=MagickFalse;
446
447  device=RequestOpenCLDevice(clEnv);
448  queue=AcquireOpenCLCommandQueue(device);
449  if (queue == (cl_command_queue) NULL)
450    goto cleanup;
451  filteredImage=cloneImage(image,exception);
452  if (filteredImage == (Image *) NULL)
453    goto cleanup;
454  if (filteredImage->number_channels != image->number_channels)
455    goto cleanup;
456  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
457  if (imageBuffer == (cl_mem) NULL)
458    goto cleanup;
459  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
460  if (filteredImageBuffer == (cl_mem) NULL)
461    goto cleanup;
462
463  /* find out how many random numbers needed by pixel */
464  numRandPerChannel=0;
465  numRandomNumberPerPixel=0;
466  switch (noise_type)
467  {
468    case UniformNoise:
469    case ImpulseNoise:
470    case LaplacianNoise:
471    case RandomNoise:
472    default:
473      numRandPerChannel=1;
474      break;
475    case GaussianNoise:
476    case MultiplicativeGaussianNoise:
477    case PoissonNoise:
478      numRandPerChannel=2;
479      break;
480  };
481  if (GetPixelRedTraits(image) != UndefinedPixelTrait)
482    numRandomNumberPerPixel+=numRandPerChannel;
483  if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
484    numRandomNumberPerPixel+=numRandPerChannel;
485  if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
486    numRandomNumberPerPixel+=numRandPerChannel;
487  if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
488    numRandomNumberPerPixel+=numRandPerChannel;
489
490  addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
491  if (addNoiseKernel == (cl_kernel) NULL)
492  {
493    (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
494      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
495    goto cleanup;
496  }
497
498  /* 256 work items per group, 2 groups per CU */
499  workItemCount=device->max_compute_units*2*256;
500  inputPixelCount=(cl_int) (image->columns*image->rows);
501  pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
502  pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
503  lsize[0]=256;
504  gsize[0]=workItemCount;
505
506  randomInfo=AcquireRandomInfo();
507  s=GetRandomInfoSeed(randomInfo);
508  seed0=s[0];
509  (void) GetPseudoRandomValue(randomInfo);
510  seed1=s[0];
511  randomInfo=DestroyRandomInfo(randomInfo);
512
513  number_channels=(cl_uint) image->number_channels;
514  bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
515  attenuate=1.0f;
516  option=GetImageArtifact(image,"attenuate");
517  if (option != (char *) NULL)
518    attenuate=(float)StringToDouble(option,(char **) NULL);
519
520  i=0;
521  status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
522  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
523  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
524  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
525  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
526  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
527  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&attenuate);
528  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
529  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
530  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
531  status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
532  if (status != CL_SUCCESS)
533  {
534    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
535      ResourceLimitWarning,"clSetKernelArg failed.",".");
536    goto cleanup;
537  }
538
539  outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
540    lsize,image,filteredImage,exception);
541
542cleanup:
543
544  if (addNoiseKernel != (cl_kernel) NULL)
545    ReleaseOpenCLKernel(addNoiseKernel);
546  if (queue != (cl_command_queue) NULL)
547    ReleaseOpenCLCommandQueue(device,queue);
548  if (device != (MagickCLDevice) NULL)
549    ReleaseOpenCLDevice(device);
550  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
551    filteredImage=DestroyImage(filteredImage);
552
553  return(filteredImage);
554}
555
556MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
557  const NoiseType noise_type,ExceptionInfo *exception)
558{
559  Image
560    *filteredImage;
561
562  MagickCLEnv
563    clEnv;
564
565  assert(image != NULL);
566  assert(exception != (ExceptionInfo *) NULL);
567
568  if (checkAccelerateCondition(image) == MagickFalse)
569    return((Image *) NULL);
570
571  clEnv=getOpenCLEnvironment(exception);
572  if (clEnv == (MagickCLEnv) NULL)
573    return((Image *) NULL);
574
575  filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,exception);
576  return(filteredImage);
577}
578
579/*
580%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
581%                                                                             %
582%                                                                             %
583%                                                                             %
584%     A c c e l e r a t e B l u r I m a g e                                   %
585%                                                                             %
586%                                                                             %
587%                                                                             %
588%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
589*/
590
591static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
592  const double radius,const double sigma,ExceptionInfo *exception)
593{
594  cl_command_queue
595    queue;
596
597  cl_int
598    status;
599
600  cl_kernel
601    blurColumnKernel,
602    blurRowKernel;
603
604  cl_mem
605    filteredImageBuffer,
606    imageBuffer,
607    imageKernelBuffer,
608    tempImageBuffer;
609
610  cl_uint
611    imageColumns,
612    imageRows,
613    kernelWidth,
614    number_channels;
615
616  Image
617    *filteredImage;
618
619  MagickBooleanType
620    outputReady;
621
622  MagickCLDevice
623    device;
624
625  MagickSizeType
626    length;
627
628  size_t
629    chunkSize=256,
630    gsize[2],
631    i,
632    lsize[2];
633
634  filteredImage=NULL;
635  tempImageBuffer=NULL;
636  imageKernelBuffer=NULL;
637  blurRowKernel=NULL;
638  blurColumnKernel=NULL;
639  outputReady=MagickFalse;
640
641  device=RequestOpenCLDevice(clEnv);
642  queue=AcquireOpenCLCommandQueue(device);
643  filteredImage=cloneImage(image,exception);
644  if (filteredImage == (Image *) NULL)
645    goto cleanup;
646  if (filteredImage->number_channels != image->number_channels)
647    goto cleanup;
648  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
649  if (imageBuffer == (cl_mem) NULL)
650    goto cleanup;
651  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
652  if (filteredImageBuffer == (cl_mem) NULL)
653    goto cleanup;
654
655  imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
656    exception);
657  if (imageKernelBuffer == (cl_mem) NULL)
658    goto cleanup;
659
660  length=image->columns*image->rows;
661  tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
662    sizeof(cl_float4),(void *) NULL);
663  if (tempImageBuffer == (cl_mem) NULL)
664    goto cleanup;
665
666  blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
667  if (blurRowKernel == (cl_kernel) NULL)
668  {
669    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
670      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
671    goto cleanup;
672  }
673
674  number_channels=(cl_uint) image->number_channels;
675  imageColumns=(cl_uint) image->columns;
676  imageRows=(cl_uint) image->rows;
677
678  i=0;
679  status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
680  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
681  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
682  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
683  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
684  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
685  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
686  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
687  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
688  if (status != CL_SUCCESS)
689  {
690    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
691      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
692    goto cleanup;
693  }
694
695  gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
696  gsize[1]=image->rows;
697  lsize[0]=chunkSize;
698  lsize[1]=1;
699
700  outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
701    lsize,image,filteredImage,exception);
702  if (outputReady == MagickFalse)
703    goto cleanup;
704
705  blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
706  if (blurColumnKernel == (cl_kernel) NULL)
707  {
708    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
709      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
710    goto cleanup;
711  }
712
713  i=0;
714  status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
715  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
716  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
717  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
718  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
719  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
720  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
721  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
722  status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
723  if (status != CL_SUCCESS)
724  {
725    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
726      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
727    goto cleanup;
728  }
729
730  gsize[0]=image->columns;
731  gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
732  lsize[0]=1;
733  lsize[1]=chunkSize;
734
735  outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
736    lsize,image,filteredImage,exception);
737
738cleanup:
739
740  if (tempImageBuffer != (cl_mem) NULL)
741    ReleaseOpenCLMemObject(tempImageBuffer);
742  if (imageKernelBuffer != (cl_mem) NULL)
743    ReleaseOpenCLMemObject(imageKernelBuffer);
744  if (blurRowKernel != (cl_kernel) NULL)
745    ReleaseOpenCLKernel(blurRowKernel);
746  if (blurColumnKernel != (cl_kernel) NULL)
747    ReleaseOpenCLKernel(blurColumnKernel);
748  if (queue != (cl_command_queue) NULL)
749    ReleaseOpenCLCommandQueue(device,queue);
750  if (device != (MagickCLDevice) NULL)
751    ReleaseOpenCLDevice(device);
752  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
753    filteredImage=DestroyImage(filteredImage);
754
755  return(filteredImage);
756}
757
758MagickPrivate Image* AccelerateBlurImage(const Image *image,
759  const double radius,const double sigma,ExceptionInfo *exception)
760{
761  Image
762    *filteredImage;
763
764  MagickCLEnv
765    clEnv;
766
767  assert(image != NULL);
768  assert(exception != (ExceptionInfo *) NULL);
769
770  if (checkAccelerateCondition(image) == MagickFalse)
771    return((Image *) NULL);
772
773  clEnv=getOpenCLEnvironment(exception);
774  if (clEnv == (MagickCLEnv) NULL)
775    return((Image *) NULL);
776
777  filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
778  return(filteredImage);
779}
780
781/*
782%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
783%                                                                             %
784%                                                                             %
785%                                                                             %
786%     A c c e l e r a t e C o n t r a s t I m a g e                           %
787%                                                                             %
788%                                                                             %
789%                                                                             %
790%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
791*/
792
793static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
794  const MagickBooleanType sharpen,ExceptionInfo *exception)
795{
796  CacheView
797    *image_view;
798
799  cl_command_queue
800    queue;
801
802  cl_int
803    clStatus;
804
805  cl_kernel
806    filterKernel;
807
808  cl_event
809    event;
810
811  cl_mem
812    imageBuffer;
813
814  cl_mem_flags
815    mem_flags;
816
817  MagickBooleanType
818    outputReady;
819
820  MagickCLDevice
821    device;
822
823  MagickSizeType
824    length;
825
826  size_t
827    global_work_size[2];
828
829  unsigned int
830    i,
831    uSharpen;
832
833  void
834    *inputPixels;
835
836  outputReady = MagickFalse;
837  inputPixels = NULL;
838  imageBuffer = NULL;
839  filterKernel = NULL;
840  queue = NULL;
841
842  device = RequestOpenCLDevice(clEnv);
843
844  /* Create and initialize OpenCL buffers. */
845  image_view=AcquireAuthenticCacheView(image,exception);
846  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
847  if (inputPixels == (void *) NULL)
848  {
849    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
850    goto cleanup;
851  }
852
853  /* If the host pointer is aligned to the size of CLPixelPacket,
854     then use the host buffer directly from the GPU; otherwise,
855     create a buffer on the GPU and copy the data over */
856  if (ALIGNED(inputPixels,CLPixelPacket))
857  {
858    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
859  }
860  else
861  {
862    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
863  }
864  /* create a CL buffer from image pixel buffer */
865  length = image->columns * image->rows;
866  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
867  if (clStatus != CL_SUCCESS)
868  {
869    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
870    goto cleanup;
871  }
872
873  filterKernel = AcquireOpenCLKernel(device,"Contrast");
874  if (filterKernel == NULL)
875  {
876    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
877    goto cleanup;
878  }
879
880  i = 0;
881  clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
882
883  uSharpen = (sharpen == MagickFalse)?0:1;
884  clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
885  if (clStatus != CL_SUCCESS)
886  {
887    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
888    goto cleanup;
889  }
890
891  global_work_size[0] = image->columns;
892  global_work_size[1] = image->rows;
893  /* launch the kernel */
894  queue = AcquireOpenCLCommandQueue(device);
895  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
896  if (clStatus != CL_SUCCESS)
897  {
898    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
899    goto cleanup;
900  }
901  RecordProfileData(device,filterKernel,event);
902
903  if (ALIGNED(inputPixels,CLPixelPacket))
904  {
905    length = image->columns * image->rows;
906    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
907  }
908  else
909  {
910    length = image->columns * image->rows;
911    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
912  }
913  if (clStatus != CL_SUCCESS)
914  {
915    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
916    goto cleanup;
917  }
918  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
919
920cleanup:
921
922  image_view=DestroyCacheView(image_view);
923
924  if (imageBuffer!=NULL)
925    clEnv->library->clReleaseMemObject(imageBuffer);
926  if (filterKernel!=NULL)
927    ReleaseOpenCLKernel(filterKernel);
928  if (queue != NULL)
929    ReleaseOpenCLCommandQueue(device,queue);
930  if (device != NULL)
931    ReleaseOpenCLDevice(device);
932
933  return(outputReady);
934}
935
936MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
937  const MagickBooleanType sharpen,ExceptionInfo *exception)
938{
939  MagickBooleanType
940    status;
941
942  MagickCLEnv
943    clEnv;
944
945  assert(image != NULL);
946  assert(exception != (ExceptionInfo *) NULL);
947
948  if (checkAccelerateConditionRGBA(image) == MagickFalse)
949    return(MagickFalse);
950
951  clEnv=getOpenCLEnvironment(exception);
952  if (clEnv == (MagickCLEnv) NULL)
953    return(MagickFalse);
954
955  status=ComputeContrastImage(image,clEnv,sharpen,exception);
956  return(status);
957}
958
959/*
960%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
961%                                                                             %
962%                                                                             %
963%                                                                             %
964%     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
965%                                                                             %
966%                                                                             %
967%                                                                             %
968%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
969*/
970
971static MagickBooleanType ComputeContrastStretchImage(Image *image,
972  MagickCLEnv clEnv,const double black_point,const double white_point,
973  ExceptionInfo *exception)
974{
975#define ContrastStretchImageTag  "ContrastStretch/Image"
976#define MaxRange(color)  ((MagickRealType) ScaleQuantumToMap((Quantum) (color)))
977
978  CacheView
979    *image_view;
980
981  cl_command_queue
982    queue;
983
984  cl_int
985    clStatus;
986
987  cl_mem_flags
988    mem_flags;
989
990  cl_mem
991    histogramBuffer,
992    imageBuffer,
993    stretchMapBuffer;
994
995  cl_kernel
996    histogramKernel,
997    stretchKernel;
998
999  cl_event
1000    event;
1001
1002  cl_uint4
1003    *histogram;
1004
1005  double
1006    intensity;
1007
1008  FloatPixelPacket
1009    black,
1010    white;
1011
1012  MagickBooleanType
1013    outputReady,
1014    status;
1015
1016  MagickCLDevice
1017    device;
1018
1019  MagickSizeType
1020    length;
1021
1022  PixelPacket
1023    *stretch_map;
1024
1025  register ssize_t
1026    i;
1027
1028  size_t
1029    global_work_size[2];
1030
1031  void
1032    *hostPtr,
1033    *inputPixels;
1034
1035  histogram=NULL;
1036  stretch_map=NULL;
1037  inputPixels = NULL;
1038  imageBuffer = NULL;
1039  histogramBuffer = NULL;
1040  stretchMapBuffer = NULL;
1041  histogramKernel = NULL;
1042  stretchKernel = NULL;
1043  queue = NULL;
1044  outputReady = MagickFalse;
1045
1046
1047  assert(image != (Image *) NULL);
1048  assert(image->signature == MagickCoreSignature);
1049  if (image->debug != MagickFalse)
1050    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1051
1052  //exception=(&image->exception);
1053
1054  /*
1055   * initialize opencl env
1056   */
1057  device = RequestOpenCLDevice(clEnv);
1058  queue = AcquireOpenCLCommandQueue(device);
1059
1060  /*
1061    Allocate and initialize histogram arrays.
1062  */
1063  histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1064
1065  if (histogram == (cl_uint4 *) NULL)
1066    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1067
1068  /* reset histogram */
1069  (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
1070
1071  /*
1072  if (IsGrayImage(image,exception) != MagickFalse)
1073    (void) SetImageColorspace(image,GRAYColorspace);
1074  */
1075
1076  status=MagickTrue;
1077
1078
1079  /*
1080    Form histogram.
1081  */
1082  /* Create and initialize OpenCL buffers. */
1083  /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1084  /* assume this  will get a writable image */
1085  image_view=AcquireAuthenticCacheView(image,exception);
1086  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1087
1088  if (inputPixels == (void *) NULL)
1089  {
1090    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1091    goto cleanup;
1092  }
1093  /* If the host pointer is aligned to the size of CLPixelPacket,
1094     then use the host buffer directly from the GPU; otherwise,
1095     create a buffer on the GPU and copy the data over */
1096  if (ALIGNED(inputPixels,CLPixelPacket))
1097  {
1098    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1099  }
1100  else
1101  {
1102    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1103  }
1104  /* create a CL buffer from image pixel buffer */
1105  length = image->columns * image->rows;
1106  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1107  if (clStatus != CL_SUCCESS)
1108  {
1109    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1110    goto cleanup;
1111  }
1112
1113  /* If the host pointer is aligned to the size of cl_uint,
1114     then use the host buffer directly from the GPU; otherwise,
1115     create a buffer on the GPU and copy the data over */
1116  if (ALIGNED(histogram,cl_uint4))
1117  {
1118    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1119    hostPtr = histogram;
1120  }
1121  else
1122  {
1123    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1124    hostPtr = histogram;
1125  }
1126  /* create a CL buffer for histogram  */
1127  length = (MaxMap+1);
1128  histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
1129  if (clStatus != CL_SUCCESS)
1130  {
1131    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1132    goto cleanup;
1133  }
1134
1135  status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
1136  if (status == MagickFalse)
1137    goto cleanup;
1138
1139  /* read from the kenel output */
1140  if (ALIGNED(histogram,cl_uint4))
1141  {
1142    length = (MaxMap+1);
1143    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
1144  }
1145  else
1146  {
1147    length = (MaxMap+1);
1148    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
1149  }
1150  if (clStatus != CL_SUCCESS)
1151  {
1152    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1153    goto cleanup;
1154  }
1155
1156  /* unmap, don't block gpu to use this buffer again.  */
1157  if (ALIGNED(histogram,cl_uint4))
1158  {
1159    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1160    if (clStatus != CL_SUCCESS)
1161    {
1162      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1163      goto cleanup;
1164    }
1165  }
1166
1167  /* recreate input buffer later, in case image updated */
1168#ifdef RECREATEBUFFER
1169  if (imageBuffer!=NULL)
1170    clEnv->library->clReleaseMemObject(imageBuffer);
1171#endif
1172
1173  /* CPU stuff */
1174  /*
1175     Find the histogram boundaries by locating the black/white levels.
1176  */
1177  black.red=0.0;
1178  white.red=MaxRange(QuantumRange);
1179  if ((image->channel_mask & RedChannel) != 0)
1180  {
1181    intensity=0.0;
1182    for (i=0; i <= (ssize_t) MaxMap; i++)
1183    {
1184      intensity+=histogram[i].s[2];
1185      if (intensity > black_point)
1186        break;
1187    }
1188    black.red=(MagickRealType) i;
1189    intensity=0.0;
1190    for (i=(ssize_t) MaxMap; i != 0; i--)
1191    {
1192      intensity+=histogram[i].s[2];
1193      if (intensity > ((double) image->columns*image->rows-white_point))
1194        break;
1195    }
1196    white.red=(MagickRealType) i;
1197  }
1198  black.green=0.0;
1199  white.green=MaxRange(QuantumRange);
1200  if ((image->channel_mask & GreenChannel) != 0)
1201  {
1202    intensity=0.0;
1203    for (i=0; i <= (ssize_t) MaxMap; i++)
1204    {
1205      intensity+=histogram[i].s[2];
1206      if (intensity > black_point)
1207        break;
1208    }
1209    black.green=(MagickRealType) i;
1210    intensity=0.0;
1211    for (i=(ssize_t) MaxMap; i != 0; i--)
1212    {
1213      intensity+=histogram[i].s[2];
1214      if (intensity > ((double) image->columns*image->rows-white_point))
1215        break;
1216    }
1217    white.green=(MagickRealType) i;
1218  }
1219  black.blue=0.0;
1220  white.blue=MaxRange(QuantumRange);
1221  if ((image->channel_mask & BlueChannel) != 0)
1222  {
1223    intensity=0.0;
1224    for (i=0; i <= (ssize_t) MaxMap; i++)
1225    {
1226      intensity+=histogram[i].s[2];
1227      if (intensity > black_point)
1228        break;
1229    }
1230    black.blue=(MagickRealType) i;
1231    intensity=0.0;
1232    for (i=(ssize_t) MaxMap; i != 0; i--)
1233    {
1234      intensity+=histogram[i].s[2];
1235      if (intensity > ((double) image->columns*image->rows-white_point))
1236        break;
1237    }
1238    white.blue=(MagickRealType) i;
1239  }
1240  black.alpha=0.0;
1241  white.alpha=MaxRange(QuantumRange);
1242  if ((image->channel_mask & AlphaChannel) != 0)
1243  {
1244    intensity=0.0;
1245    for (i=0; i <= (ssize_t) MaxMap; i++)
1246    {
1247      intensity+=histogram[i].s[2];
1248      if (intensity > black_point)
1249        break;
1250    }
1251    black.alpha=(MagickRealType) i;
1252    intensity=0.0;
1253    for (i=(ssize_t) MaxMap; i != 0; i--)
1254    {
1255      intensity+=histogram[i].s[2];
1256      if (intensity > ((double) image->columns*image->rows-white_point))
1257        break;
1258    }
1259    white.alpha=(MagickRealType) i;
1260  }
1261  /*
1262  black.index=0.0;
1263  white.index=MaxRange(QuantumRange);
1264  if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace))
1265  {
1266    intensity=0.0;
1267    for (i=0; i <= (ssize_t) MaxMap; i++)
1268    {
1269      intensity+=histogram[i].index;
1270      if (intensity > black_point)
1271        break;
1272    }
1273    black.index=(MagickRealType) i;
1274    intensity=0.0;
1275    for (i=(ssize_t) MaxMap; i != 0; i--)
1276    {
1277      intensity+=histogram[i].index;
1278      if (intensity > ((double) image->columns*image->rows-white_point))
1279        break;
1280    }
1281    white.index=(MagickRealType) i;
1282  }
1283  */
1284
1285
1286  stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
1287    sizeof(*stretch_map));
1288
1289  if (stretch_map == (PixelPacket *) NULL)
1290    ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1291      image->filename);
1292
1293  /*
1294    Stretch the histogram to create the stretched image mapping.
1295  */
1296  (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
1297  for (i=0; i <= (ssize_t) MaxMap; i++)
1298  {
1299    if ((image->channel_mask & RedChannel) != 0)
1300    {
1301      if (i < (ssize_t) black.red)
1302        stretch_map[i].red=(Quantum) 0;
1303      else
1304        if (i > (ssize_t) white.red)
1305          stretch_map[i].red=QuantumRange;
1306        else
1307          if (black.red != white.red)
1308            stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1309                  (i-black.red)/(white.red-black.red)));
1310    }
1311    if ((image->channel_mask & GreenChannel) != 0)
1312    {
1313      if (i < (ssize_t) black.green)
1314        stretch_map[i].green=0;
1315      else
1316        if (i > (ssize_t) white.green)
1317          stretch_map[i].green=QuantumRange;
1318        else
1319          if (black.green != white.green)
1320            stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1321                  (i-black.green)/(white.green-black.green)));
1322    }
1323    if ((image->channel_mask & BlueChannel) != 0)
1324    {
1325      if (i < (ssize_t) black.blue)
1326        stretch_map[i].blue=0;
1327      else
1328        if (i > (ssize_t) white.blue)
1329          stretch_map[i].blue= QuantumRange;
1330        else
1331          if (black.blue != white.blue)
1332            stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1333                  (i-black.blue)/(white.blue-black.blue)));
1334    }
1335    if ((image->channel_mask & AlphaChannel) != 0)
1336    {
1337      if (i < (ssize_t) black.alpha)
1338        stretch_map[i].alpha=0;
1339      else
1340        if (i > (ssize_t) white.alpha)
1341          stretch_map[i].alpha=QuantumRange;
1342        else
1343          if (black.alpha != white.alpha)
1344            stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
1345                  (i-black.alpha)/(white.alpha-black.alpha)));
1346    }
1347    /*
1348    if (((channel & IndexChannel) != 0) &&
1349        (image->colorspace == CMYKColorspace))
1350    {
1351      if (i < (ssize_t) black.index)
1352        stretch_map[i].index=0;
1353      else
1354        if (i > (ssize_t) white.index)
1355          stretch_map[i].index=QuantumRange;
1356        else
1357          if (black.index != white.index)
1358            stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap*
1359                  (i-black.index)/(white.index-black.index)));
1360    }
1361    */
1362  }
1363
1364  /*
1365    Stretch the image.
1366  */
1367  if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
1368      (image->colorspace == CMYKColorspace)))
1369    image->storage_class=DirectClass;
1370  if (image->storage_class == PseudoClass)
1371  {
1372    /*
1373       Stretch colormap.
1374       */
1375    for (i=0; i < (ssize_t) image->colors; i++)
1376    {
1377      if ((image->channel_mask & RedChannel) != 0)
1378      {
1379        if (black.red != white.red)
1380          image->colormap[i].red=stretch_map[
1381            ScaleQuantumToMap(image->colormap[i].red)].red;
1382      }
1383      if ((image->channel_mask & GreenChannel) != 0)
1384      {
1385        if (black.green != white.green)
1386          image->colormap[i].green=stretch_map[
1387            ScaleQuantumToMap(image->colormap[i].green)].green;
1388      }
1389      if ((image->channel_mask & BlueChannel) != 0)
1390      {
1391        if (black.blue != white.blue)
1392          image->colormap[i].blue=stretch_map[
1393            ScaleQuantumToMap(image->colormap[i].blue)].blue;
1394      }
1395      if ((image->channel_mask & AlphaChannel) != 0)
1396      {
1397        if (black.alpha != white.alpha)
1398          image->colormap[i].alpha=stretch_map[
1399            ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
1400      }
1401    }
1402  }
1403
1404  /*
1405    Stretch image.
1406  */
1407
1408
1409  /* GPU can work on this again, image and equalize map as input
1410    image:        uchar4 (CLPixelPacket)
1411    stretch_map:  uchar4 (PixelPacket)
1412    black, white: float4 (FloatPixelPacket) */
1413
1414#ifdef RECREATEBUFFER
1415  /* If the host pointer is aligned to the size of CLPixelPacket,
1416     then use the host buffer directly from the GPU; otherwise,
1417     create a buffer on the GPU and copy the data over */
1418  if (ALIGNED(inputPixels,CLPixelPacket))
1419  {
1420    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1421  }
1422  else
1423  {
1424    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1425  }
1426  /* create a CL buffer from image pixel buffer */
1427  length = image->columns * image->rows;
1428  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1429  if (clStatus != CL_SUCCESS)
1430  {
1431    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1432    goto cleanup;
1433  }
1434#endif
1435
1436  /* Create and initialize OpenCL buffers. */
1437  if (ALIGNED(stretch_map, PixelPacket))
1438  {
1439    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1440    hostPtr = stretch_map;
1441  }
1442  else
1443  {
1444    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1445    hostPtr = stretch_map;
1446  }
1447  /* create a CL buffer for stretch_map  */
1448  length = (MaxMap+1);
1449  stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
1450  if (clStatus != CL_SUCCESS)
1451  {
1452    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1453    goto cleanup;
1454  }
1455
1456  /* get the OpenCL kernel */
1457  stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
1458  if (stretchKernel == NULL)
1459  {
1460    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1461    goto cleanup;
1462  }
1463
1464  /* set the kernel arguments */
1465  i = 0;
1466  clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1467  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
1468  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1469  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&white);
1470  clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&black);
1471  if (clStatus != CL_SUCCESS)
1472  {
1473    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1474    goto cleanup;
1475  }
1476
1477  /* launch the kernel */
1478  global_work_size[0] = image->columns;
1479  global_work_size[1] = image->rows;
1480
1481  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1482
1483  if (clStatus != CL_SUCCESS)
1484  {
1485    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1486    goto cleanup;
1487  }
1488  RecordProfileData(device,stretchKernel,event);
1489
1490  /* read the data back */
1491  if (ALIGNED(inputPixels,CLPixelPacket))
1492  {
1493    length = image->columns * image->rows;
1494    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1495  }
1496  else
1497  {
1498    length = image->columns * image->rows;
1499    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1500  }
1501  if (clStatus != CL_SUCCESS)
1502  {
1503    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1504    goto cleanup;
1505  }
1506
1507  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1508
1509cleanup:
1510
1511  image_view=DestroyCacheView(image_view);
1512
1513  if (imageBuffer!=NULL)
1514    clEnv->library->clReleaseMemObject(imageBuffer);
1515
1516  if (stretchMapBuffer!=NULL)
1517    clEnv->library->clReleaseMemObject(stretchMapBuffer);
1518  if (stretch_map!=NULL)
1519    stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1520  if (histogramBuffer!=NULL)
1521    clEnv->library->clReleaseMemObject(histogramBuffer);
1522  if (histogram!=NULL)
1523    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1524  if (histogramKernel!=NULL)
1525    ReleaseOpenCLKernel(histogramKernel);
1526  if (stretchKernel!=NULL)
1527    ReleaseOpenCLKernel(stretchKernel);
1528  if (queue != NULL)
1529    ReleaseOpenCLCommandQueue(device,queue);
1530  if (device != NULL)
1531    ReleaseOpenCLDevice(device);
1532
1533  return(outputReady);
1534}
1535
1536MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
1537  Image *image,const double black_point,const double white_point,
1538  ExceptionInfo *exception)
1539{
1540  MagickBooleanType
1541    status;
1542
1543  MagickCLEnv
1544    clEnv;
1545
1546  assert(image != NULL);
1547  assert(exception != (ExceptionInfo *) NULL);
1548
1549  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1550      (checkHistogramCondition(image,image->intensity) == MagickFalse))
1551    return(MagickFalse);
1552
1553  clEnv=getOpenCLEnvironment(exception);
1554  if (clEnv == (MagickCLEnv) NULL)
1555    return(MagickFalse);
1556
1557  status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
1558    exception);
1559  return(status);
1560}
1561
1562/*
1563%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1564%                                                                             %
1565%                                                                             %
1566%                                                                             %
1567%     A c c e l e r a t e C o n v o l v e I m a g e                           %
1568%                                                                             %
1569%                                                                             %
1570%                                                                             %
1571%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1572*/
1573
1574static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
1575  const KernelInfo *kernel,ExceptionInfo *exception)
1576{
1577  CacheView
1578    *filteredImage_view,
1579    *image_view;
1580
1581  cl_command_queue
1582    queue;
1583
1584  cl_event
1585    event;
1586
1587  cl_kernel
1588    clkernel;
1589
1590  cl_int
1591    clStatus;
1592
1593  cl_mem
1594    convolutionKernel,
1595    filteredImageBuffer,
1596    imageBuffer;
1597
1598  cl_mem_flags
1599    mem_flags;
1600
1601  const void
1602    *inputPixels;
1603
1604  float
1605    *kernelBufferPtr;
1606
1607  Image
1608    *filteredImage;
1609
1610  MagickBooleanType
1611    outputReady;
1612
1613  MagickCLDevice
1614    device;
1615
1616  MagickSizeType
1617    length;
1618
1619  size_t
1620    global_work_size[3],
1621    localGroupSize[3],
1622    localMemoryRequirement;
1623
1624  unsigned
1625    kernelSize;
1626
1627  unsigned int
1628    filterHeight,
1629    filterWidth,
1630    i,
1631    imageHeight,
1632    imageWidth,
1633    matte;
1634
1635  void
1636    *filteredPixels,
1637    *hostPtr;
1638
1639  /* intialize all CL objects to NULL */
1640  imageBuffer = NULL;
1641  filteredImageBuffer = NULL;
1642  convolutionKernel = NULL;
1643  clkernel = NULL;
1644  queue = NULL;
1645
1646  filteredImage = NULL;
1647  filteredImage_view = NULL;
1648  outputReady = MagickFalse;
1649
1650  device = RequestOpenCLDevice(clEnv);
1651
1652  image_view=AcquireAuthenticCacheView(image,exception);
1653  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1654  if (inputPixels == (const void *) NULL)
1655  {
1656    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1657    goto cleanup;
1658  }
1659
1660  /* Create and initialize OpenCL buffers. */
1661
1662  /* If the host pointer is aligned to the size of CLPixelPacket,
1663     then use the host buffer directly from the GPU; otherwise,
1664     create a buffer on the GPU and copy the data over */
1665  if (ALIGNED(inputPixels,CLPixelPacket))
1666  {
1667    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1668  }
1669  else
1670  {
1671    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1672  }
1673  /* create a CL buffer from image pixel buffer */
1674  length = image->columns * image->rows;
1675  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1676  if (clStatus != CL_SUCCESS)
1677  {
1678    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1679    goto cleanup;
1680  }
1681
1682  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
1683  assert(filteredImage != NULL);
1684  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1685  {
1686    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1687    goto cleanup;
1688  }
1689  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1690  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1691  if (filteredPixels == (void *) NULL)
1692  {
1693    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1694    goto cleanup;
1695  }
1696
1697  if (ALIGNED(filteredPixels,CLPixelPacket))
1698  {
1699    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1700    hostPtr = filteredPixels;
1701  }
1702  else
1703  {
1704    mem_flags = CL_MEM_WRITE_ONLY;
1705    hostPtr = NULL;
1706  }
1707  /* create a CL buffer from image pixel buffer */
1708  length = image->columns * image->rows;
1709  filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1710  if (clStatus != CL_SUCCESS)
1711  {
1712    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1713    goto cleanup;
1714  }
1715
1716  kernelSize = (unsigned int) (kernel->width * kernel->height);
1717  convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1718  if (clStatus != CL_SUCCESS)
1719  {
1720    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1721    goto cleanup;
1722  }
1723
1724  queue = AcquireOpenCLCommandQueue(device);
1725
1726  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1727          , 0, NULL, NULL, &clStatus);
1728  if (clStatus != CL_SUCCESS)
1729  {
1730    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1731    goto cleanup;
1732  }
1733  for (i = 0; i < kernelSize; i++)
1734  {
1735    kernelBufferPtr[i] = (float) kernel->values[i];
1736  }
1737  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1738  if (clStatus != CL_SUCCESS)
1739  {
1740    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1741    goto cleanup;
1742  }
1743
1744  /* Compute the local memory requirement for a 16x16 workgroup.
1745     If it's larger than 16k, reduce the workgroup size to 8x8 */
1746  localGroupSize[0] = 16;
1747  localGroupSize[1] = 16;
1748  localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1749    + kernel->width*kernel->height*sizeof(float);
1750
1751  if (localMemoryRequirement > device->local_memory_size)
1752  {
1753    localGroupSize[0] = 8;
1754    localGroupSize[1] = 8;
1755    localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1756      + kernel->width*kernel->height*sizeof(float);
1757  }
1758  if (localMemoryRequirement <= device->local_memory_size)
1759  {
1760    /* get the OpenCL kernel */
1761    clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
1762    if (clkernel == NULL)
1763    {
1764      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1765      goto cleanup;
1766    }
1767
1768    /* set the kernel arguments */
1769    i = 0;
1770    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1771    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1772    imageWidth = (unsigned int) image->columns;
1773    imageHeight = (unsigned int) image->rows;
1774    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1775    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1776    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1777    filterWidth = (unsigned int) kernel->width;
1778    filterHeight = (unsigned int) kernel->height;
1779    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1780    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1781    matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1782    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1783    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1784    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1785    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1786    if (clStatus != CL_SUCCESS)
1787    {
1788      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1789      goto cleanup;
1790    }
1791
1792    /* pad the global size to a multiple of the local work size dimension */
1793    global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1794    global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1795
1796    /* launch the kernel */
1797    clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1798    if (clStatus != CL_SUCCESS)
1799    {
1800      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1801      goto cleanup;
1802    }
1803    RecordProfileData(device,clkernel,event);
1804  }
1805  else
1806  {
1807    /* get the OpenCL kernel */
1808    clkernel = AcquireOpenCLKernel(device,"Convolve");
1809    if (clkernel == NULL)
1810    {
1811      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1812      goto cleanup;
1813    }
1814
1815    /* set the kernel arguments */
1816    i = 0;
1817    clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1818    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1819    imageWidth = (unsigned int) image->columns;
1820    imageHeight = (unsigned int) image->rows;
1821    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1822    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1823    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1824    filterWidth = (unsigned int) kernel->width;
1825    filterHeight = (unsigned int) kernel->height;
1826    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1827    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1828    matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1829    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1830    clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1831    if (clStatus != CL_SUCCESS)
1832    {
1833      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1834      goto cleanup;
1835    }
1836
1837    localGroupSize[0] = 8;
1838    localGroupSize[1] = 8;
1839    global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1840    global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1841	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1842
1843    if (clStatus != CL_SUCCESS)
1844    {
1845      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1846      goto cleanup;
1847    }
1848  }
1849  RecordProfileData(device,clkernel,event);
1850
1851  if (ALIGNED(filteredPixels,CLPixelPacket))
1852  {
1853    length = image->columns * image->rows;
1854    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1855  }
1856  else
1857  {
1858    length = image->columns * image->rows;
1859    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1860  }
1861  if (clStatus != CL_SUCCESS)
1862  {
1863    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1864    goto cleanup;
1865  }
1866
1867  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1868
1869cleanup:
1870
1871  image_view=DestroyCacheView(image_view);
1872  if (filteredImage_view != NULL)
1873    filteredImage_view=DestroyCacheView(filteredImage_view);
1874  if (imageBuffer != NULL)
1875    clEnv->library->clReleaseMemObject(imageBuffer);
1876  if (filteredImageBuffer != NULL)
1877    clEnv->library->clReleaseMemObject(filteredImageBuffer);
1878  if (convolutionKernel != NULL)
1879    clEnv->library->clReleaseMemObject(convolutionKernel);
1880  if (clkernel != NULL)
1881    ReleaseOpenCLKernel(clkernel);
1882  if (queue != NULL)
1883    ReleaseOpenCLCommandQueue(device,queue);
1884  if (device != NULL)
1885    ReleaseOpenCLDevice(device);
1886  if (outputReady == MagickFalse)
1887  {
1888    if (filteredImage != NULL)
1889    {
1890      DestroyImage(filteredImage);
1891      filteredImage = NULL;
1892    }
1893  }
1894
1895  return(filteredImage);
1896}
1897
1898MagickPrivate Image *AccelerateConvolveImage(const Image *image,
1899  const KernelInfo *kernel,ExceptionInfo *exception)
1900{
1901  /* Temporary disabled due to access violation
1902
1903  Image
1904    *filteredImage;
1905
1906  assert(image != NULL);
1907  assert(kernel != (KernelInfo *) NULL);
1908  assert(exception != (ExceptionInfo *) NULL);
1909  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1910      (checkOpenCLEnvironment(exception) == MagickFalse))
1911    return((Image *) NULL);
1912
1913  filteredImage=ComputeConvolveImage(image,kernel,exception);
1914  return(filteredImage);
1915  */
1916  magick_unreferenced(image);
1917  magick_unreferenced(kernel);
1918  magick_unreferenced(exception);
1919  return((Image *)NULL);
1920}
1921
1922/*
1923%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1924%                                                                             %
1925%                                                                             %
1926%                                                                             %
1927%     A c c e l e r a t e D e s p e c k l e I m a g e                         %
1928%                                                                             %
1929%                                                                             %
1930%                                                                             %
1931%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1932*/
1933
1934static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
1935  ExceptionInfo*exception)
1936{
1937  static const int
1938    X[4] = {0, 1, 1,-1},
1939    Y[4] = {1, 0, 1, 1};
1940
1941  CacheView
1942    *filteredImage_view,
1943    *image_view;
1944
1945  cl_command_queue
1946    queue;
1947
1948  cl_int
1949    clStatus;
1950
1951  cl_kernel
1952    hullPass1,
1953    hullPass2;
1954
1955  cl_event
1956    event;
1957
1958  cl_mem_flags
1959    mem_flags;
1960
1961  cl_mem
1962    filteredImageBuffer,
1963    imageBuffer,
1964    tempImageBuffer[2];
1965
1966  const void
1967    *inputPixels;
1968
1969  Image
1970    *filteredImage;
1971
1972  int
1973    k,
1974    matte;
1975
1976  MagickBooleanType
1977    outputReady;
1978
1979  MagickCLDevice
1980    device;
1981
1982  MagickSizeType
1983    length;
1984
1985  size_t
1986    global_work_size[2];
1987
1988  unsigned int
1989    imageHeight,
1990    imageWidth;
1991
1992  void
1993    *filteredPixels,
1994    *hostPtr;
1995
1996  outputReady = MagickFalse;
1997  inputPixels = NULL;
1998  filteredImage = NULL;
1999  filteredImage_view = NULL;
2000  filteredPixels = NULL;
2001  imageBuffer = NULL;
2002  filteredImageBuffer = NULL;
2003  hullPass1 = NULL;
2004  hullPass2 = NULL;
2005  queue = NULL;
2006  tempImageBuffer[0] = tempImageBuffer[1] = NULL;
2007
2008  device = RequestOpenCLDevice(clEnv);
2009  queue = AcquireOpenCLCommandQueue(device);
2010
2011  image_view=AcquireAuthenticCacheView(image,exception);
2012  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2013  if (inputPixels == (void *) NULL)
2014  {
2015    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2016    goto cleanup;
2017  }
2018
2019  if (ALIGNED(inputPixels,CLPixelPacket))
2020  {
2021    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2022  }
2023  else
2024  {
2025    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2026  }
2027  /* create a CL buffer from image pixel buffer */
2028  length = image->columns * image->rows;
2029  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2030  if (clStatus != CL_SUCCESS)
2031  {
2032    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2033    goto cleanup;
2034  }
2035
2036  mem_flags = CL_MEM_READ_WRITE;
2037  length = image->columns * image->rows;
2038  for (k = 0; k < 2; k++)
2039  {
2040    tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
2041    if (clStatus != CL_SUCCESS)
2042    {
2043      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2044      goto cleanup;
2045    }
2046  }
2047
2048  filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
2049  assert(filteredImage != NULL);
2050  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
2051  {
2052    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
2053    goto cleanup;
2054  }
2055  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
2056  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
2057  if (filteredPixels == (void *) NULL)
2058  {
2059    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2060    goto cleanup;
2061  }
2062
2063  if (ALIGNED(filteredPixels,CLPixelPacket))
2064  {
2065    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2066    hostPtr = filteredPixels;
2067  }
2068  else
2069  {
2070    mem_flags = CL_MEM_WRITE_ONLY;
2071    hostPtr = NULL;
2072  }
2073  /* create a CL buffer from image pixel buffer */
2074  length = image->columns * image->rows;
2075  filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2076  if (clStatus != CL_SUCCESS)
2077  {
2078    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2079    goto cleanup;
2080  }
2081
2082  hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
2083  hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
2084
2085  clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2086  clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2087  imageWidth = (unsigned int) image->columns;
2088  clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
2089  imageHeight = (unsigned int) image->rows;
2090  clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
2091  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2092  clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
2093  if (clStatus != CL_SUCCESS)
2094  {
2095    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2096    goto cleanup;
2097  }
2098
2099  clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2100  clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2101  imageWidth = (unsigned int) image->columns;
2102  clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2103  imageHeight = (unsigned int) image->rows;
2104  clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2105  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2106  clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2107  if (clStatus != CL_SUCCESS)
2108  {
2109    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2110    goto cleanup;
2111  }
2112
2113
2114  global_work_size[0] = image->columns;
2115  global_work_size[1] = image->rows;
2116
2117
2118  for (k = 0; k < 4; k++)
2119  {
2120    cl_int2 offset;
2121    int polarity;
2122
2123
2124    offset.s[0] = X[k];
2125    offset.s[1] = Y[k];
2126    polarity = 1;
2127    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2128    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2129    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2130    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2131    if (clStatus != CL_SUCCESS)
2132    {
2133      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2134      goto cleanup;
2135    }
2136    /* launch the kernel */
2137	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2138    if (clStatus != CL_SUCCESS)
2139    {
2140      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2141      goto cleanup;
2142    }
2143    RecordProfileData(device,hullPass1,event);
2144
2145    /* launch the kernel */
2146	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2147    if (clStatus != CL_SUCCESS)
2148    {
2149      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2150      goto cleanup;
2151    }
2152    RecordProfileData(device,hullPass2,event);
2153
2154    if (k == 0)
2155      clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2156    offset.s[0] = -X[k];
2157    offset.s[1] = -Y[k];
2158    polarity = 1;
2159    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2160    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2161    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2162    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2163    if (clStatus != CL_SUCCESS)
2164    {
2165      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2166      goto cleanup;
2167    }
2168    /* launch the kernel */
2169	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2170    if (clStatus != CL_SUCCESS)
2171    {
2172      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2173      goto cleanup;
2174    }
2175    RecordProfileData(device,hullPass1,event);
2176
2177    /* launch the kernel */
2178	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2179    if (clStatus != CL_SUCCESS)
2180    {
2181      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2182      goto cleanup;
2183    }
2184    RecordProfileData(device,hullPass2,event);
2185
2186    offset.s[0] = -X[k];
2187    offset.s[1] = -Y[k];
2188    polarity = -1;
2189    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2190    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2191    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2192    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2193    if (clStatus != CL_SUCCESS)
2194    {
2195      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2196      goto cleanup;
2197    }
2198    /* launch the kernel */
2199	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2200    if (clStatus != CL_SUCCESS)
2201    {
2202      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2203      goto cleanup;
2204    }
2205    RecordProfileData(device,hullPass1,event);
2206
2207    /* launch the kernel */
2208	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2209    if (clStatus != CL_SUCCESS)
2210    {
2211      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2212      goto cleanup;
2213    }
2214    RecordProfileData(device,hullPass2,event);
2215
2216    offset.s[0] = X[k];
2217    offset.s[1] = Y[k];
2218    polarity = -1;
2219    clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2220    clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2221    clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2222    clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2223
2224    if (k == 3)
2225      clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2226
2227    if (clStatus != CL_SUCCESS)
2228    {
2229      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2230      goto cleanup;
2231    }
2232    /* launch the kernel */
2233	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2234    if (clStatus != CL_SUCCESS)
2235    {
2236      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2237      goto cleanup;
2238    }
2239    RecordProfileData(device,hullPass1,event);
2240
2241    /* launch the kernel */
2242	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2243    if (clStatus != CL_SUCCESS)
2244    {
2245      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2246      goto cleanup;
2247    }
2248    RecordProfileData(device,hullPass2,event);
2249  }
2250
2251  if (ALIGNED(filteredPixels,CLPixelPacket))
2252  {
2253    length = image->columns * image->rows;
2254    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2255  }
2256  else
2257  {
2258    length = image->columns * image->rows;
2259    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2260  }
2261  if (clStatus != CL_SUCCESS)
2262  {
2263    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2264    goto cleanup;
2265  }
2266
2267  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2268
2269cleanup:
2270
2271  image_view=DestroyCacheView(image_view);
2272  if (filteredImage_view != NULL)
2273    filteredImage_view=DestroyCacheView(filteredImage_view);
2274
2275  if (queue != NULL)
2276    ReleaseOpenCLCommandQueue(device,queue);
2277  if (device != NULL)
2278    ReleaseOpenCLDevice(device);
2279  if (imageBuffer!=NULL)
2280    clEnv->library->clReleaseMemObject(imageBuffer);
2281  for (k = 0; k < 2; k++)
2282  {
2283    if (tempImageBuffer[k]!=NULL)
2284      clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2285  }
2286  if (filteredImageBuffer!=NULL)
2287    clEnv->library->clReleaseMemObject(filteredImageBuffer);
2288  if (hullPass1!=NULL)
2289    ReleaseOpenCLKernel(hullPass1);
2290  if (hullPass2!=NULL)
2291    ReleaseOpenCLKernel(hullPass2);
2292  if (outputReady == MagickFalse && filteredImage != NULL)
2293    filteredImage=DestroyImage(filteredImage);
2294
2295  return(filteredImage);
2296}
2297
2298MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2299  ExceptionInfo* exception)
2300{
2301  Image
2302    *filteredImage;
2303
2304  MagickCLEnv
2305    clEnv;
2306
2307  assert(image != NULL);
2308  assert(exception != (ExceptionInfo *) NULL);
2309
2310  if (checkAccelerateConditionRGBA(image) == MagickFalse)
2311    return((Image *) NULL);
2312
2313  clEnv=getOpenCLEnvironment(exception);
2314  if (clEnv == (MagickCLEnv) NULL)
2315    return((Image *) NULL);
2316
2317  filteredImage=ComputeDespeckleImage(image,clEnv,exception);
2318  return(filteredImage);
2319}
2320
2321/*
2322%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2323%                                                                             %
2324%                                                                             %
2325%                                                                             %
2326%     A c c e l e r a t e E q u a l i z e I m a g e                           %
2327%                                                                             %
2328%                                                                             %
2329%                                                                             %
2330%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2331*/
2332
2333static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
2334  ExceptionInfo *exception)
2335{
2336#define EqualizeImageTag  "Equalize/Image"
2337
2338  CacheView
2339    *image_view;
2340
2341  cl_command_queue
2342    queue;
2343
2344  cl_int
2345    clStatus;
2346
2347  cl_mem_flags
2348    mem_flags;
2349
2350  cl_mem
2351    equalizeMapBuffer,
2352    histogramBuffer,
2353    imageBuffer;
2354
2355  cl_kernel
2356    equalizeKernel,
2357    histogramKernel;
2358
2359  cl_event
2360    event;
2361
2362  cl_uint4
2363    *histogram;
2364
2365  FloatPixelPacket
2366    white,
2367    black,
2368    intensity,
2369    *map;
2370
2371  MagickBooleanType
2372    outputReady,
2373    status;
2374
2375  MagickCLDevice
2376    device;
2377
2378  MagickSizeType
2379    length;
2380
2381  PixelPacket
2382    *equalize_map;
2383
2384  register ssize_t
2385    i;
2386
2387  size_t
2388    global_work_size[2];
2389
2390  void
2391    *hostPtr,
2392    *inputPixels;
2393
2394  map=NULL;
2395  histogram=NULL;
2396  equalize_map=NULL;
2397  inputPixels = NULL;
2398  imageBuffer = NULL;
2399  histogramBuffer = NULL;
2400  equalizeMapBuffer = NULL;
2401  histogramKernel = NULL;
2402  equalizeKernel = NULL;
2403  queue = NULL;
2404  outputReady = MagickFalse;
2405
2406  assert(image != (Image *) NULL);
2407  assert(image->signature == MagickCoreSignature);
2408  if (image->debug != MagickFalse)
2409    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2410
2411  /*
2412   * initialize opencl env
2413   */
2414  device = RequestOpenCLDevice(clEnv);
2415  queue = AcquireOpenCLCommandQueue(device);
2416
2417  /*
2418    Allocate and initialize histogram arrays.
2419  */
2420  histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
2421  if (histogram == (cl_uint4 *) NULL)
2422      ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2423
2424  /* reset histogram */
2425  (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
2426
2427  /* Create and initialize OpenCL buffers. */
2428  /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
2429  /* assume this  will get a writable image */
2430  image_view=AcquireAuthenticCacheView(image,exception);
2431  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2432
2433  if (inputPixels == (void *) NULL)
2434  {
2435    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2436    goto cleanup;
2437  }
2438  /* If the host pointer is aligned to the size of CLPixelPacket,
2439     then use the host buffer directly from the GPU; otherwise,
2440     create a buffer on the GPU and copy the data over */
2441  if (ALIGNED(inputPixels,CLPixelPacket))
2442  {
2443    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2444  }
2445  else
2446  {
2447    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2448  }
2449  /* create a CL buffer from image pixel buffer */
2450  length = image->columns * image->rows;
2451  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2452  if (clStatus != CL_SUCCESS)
2453  {
2454    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2455    goto cleanup;
2456  }
2457
2458  /* If the host pointer is aligned to the size of cl_uint,
2459     then use the host buffer directly from the GPU; otherwise,
2460     create a buffer on the GPU and copy the data over */
2461  if (ALIGNED(histogram,cl_uint4))
2462  {
2463    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2464    hostPtr = histogram;
2465  }
2466  else
2467  {
2468    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2469    hostPtr = histogram;
2470  }
2471  /* create a CL buffer for histogram  */
2472  length = (MaxMap+1);
2473  histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2474  if (clStatus != CL_SUCCESS)
2475  {
2476    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2477    goto cleanup;
2478  }
2479
2480  status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
2481  if (status == MagickFalse)
2482    goto cleanup;
2483
2484  /* read from the kenel output */
2485  if (ALIGNED(histogram,cl_uint4))
2486  {
2487    length = (MaxMap+1);
2488    clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2489  }
2490  else
2491  {
2492    length = (MaxMap+1);
2493    clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2494  }
2495  if (clStatus != CL_SUCCESS)
2496  {
2497    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2498    goto cleanup;
2499  }
2500
2501  /* unmap, don't block gpu to use this buffer again.  */
2502  if (ALIGNED(histogram,cl_uint4))
2503  {
2504    clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2505    if (clStatus != CL_SUCCESS)
2506    {
2507      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
2508      goto cleanup;
2509    }
2510  }
2511
2512  /* recreate input buffer later, in case image updated */
2513#ifdef RECREATEBUFFER
2514  if (imageBuffer!=NULL)
2515    clEnv->library->clReleaseMemObject(imageBuffer);
2516#endif
2517
2518  /* CPU stuff */
2519  equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
2520  if (equalize_map == (PixelPacket *) NULL)
2521    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2522
2523  map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
2524  if (map == (FloatPixelPacket *) NULL)
2525    ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2526
2527  /*
2528    Integrate the histogram to get the equalization map.
2529  */
2530  (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
2531  for (i=0; i <= (ssize_t) MaxMap; i++)
2532  {
2533    if ((image->channel_mask & SyncChannels) != 0)
2534    {
2535      intensity.red+=histogram[i].s[2];
2536      map[i]=intensity;
2537      continue;
2538    }
2539    if ((image->channel_mask & RedChannel) != 0)
2540      intensity.red+=histogram[i].s[2];
2541    if ((image->channel_mask & GreenChannel) != 0)
2542      intensity.green+=histogram[i].s[1];
2543    if ((image->channel_mask & BlueChannel) != 0)
2544      intensity.blue+=histogram[i].s[0];
2545    if ((image->channel_mask & AlphaChannel) != 0)
2546      intensity.alpha+=histogram[i].s[3];
2547    /*
2548    if (((channel & IndexChannel) != 0) &&
2549        (image->colorspace == CMYKColorspace))
2550    {
2551      intensity.index+=histogram[i].index;
2552    }
2553    */
2554    map[i]=intensity;
2555  }
2556  black=map[0];
2557  white=map[(int) MaxMap];
2558  (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
2559  for (i=0; i <= (ssize_t) MaxMap; i++)
2560  {
2561    if ((image->channel_mask & SyncChannels) != 0)
2562    {
2563      if (white.red != black.red)
2564        equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2565                (map[i].red-black.red))/(white.red-black.red)));
2566      continue;
2567    }
2568    if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
2569      equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2570              (map[i].red-black.red))/(white.red-black.red)));
2571    if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
2572      equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2573              (map[i].green-black.green))/(white.green-black.green)));
2574    if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
2575      equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2576              (map[i].blue-black.blue))/(white.blue-black.blue)));
2577    if (((image->channel_mask & AlphaChannel) != 0) && (white.alpha != black.alpha))
2578      equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2579              (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
2580    /*
2581    if ((((channel & IndexChannel) != 0) &&
2582          (image->colorspace == CMYKColorspace)) &&
2583        (white.index != black.index))
2584      equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2585              (map[i].index-black.index))/(white.index-black.index)));
2586    */
2587  }
2588
2589  if (image->storage_class == PseudoClass)
2590  {
2591    /*
2592       Equalize colormap.
2593       */
2594    for (i=0; i < (ssize_t) image->colors; i++)
2595    {
2596      if ((image->channel_mask & SyncChannels) != 0)
2597      {
2598        if (white.red != black.red)
2599        {
2600          image->colormap[i].red=equalize_map[
2601            ScaleQuantumToMap(image->colormap[i].red)].red;
2602          image->colormap[i].green=equalize_map[
2603            ScaleQuantumToMap(image->colormap[i].green)].red;
2604          image->colormap[i].blue=equalize_map[
2605            ScaleQuantumToMap(image->colormap[i].blue)].red;
2606          image->colormap[i].alpha=equalize_map[
2607            ScaleQuantumToMap(image->colormap[i].alpha)].red;
2608        }
2609        continue;
2610      }
2611      if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
2612        image->colormap[i].red=equalize_map[
2613          ScaleQuantumToMap(image->colormap[i].red)].red;
2614      if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
2615        image->colormap[i].green=equalize_map[
2616          ScaleQuantumToMap(image->colormap[i].green)].green;
2617      if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
2618        image->colormap[i].blue=equalize_map[
2619          ScaleQuantumToMap(image->colormap[i].blue)].blue;
2620      if (((image->channel_mask & AlphaChannel) != 0) &&
2621          (white.alpha != black.alpha))
2622        image->colormap[i].alpha=equalize_map[
2623          ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2624    }
2625  }
2626
2627  /*
2628    Equalize image.
2629  */
2630
2631  /* GPU can work on this again, image and equalize map as input
2632    image:        uchar4 (CLPixelPacket)
2633    equalize_map: uchar4 (PixelPacket)
2634    black, white: float4 (FloatPixelPacket) */
2635
2636#ifdef RECREATEBUFFER
2637  /* If the host pointer is aligned to the size of CLPixelPacket,
2638     then use the host buffer directly from the GPU; otherwise,
2639     create a buffer on the GPU and copy the data over */
2640  if (ALIGNED(inputPixels,CLPixelPacket))
2641  {
2642    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2643  }
2644  else
2645  {
2646    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2647  }
2648  /* create a CL buffer from image pixel buffer */
2649  length = image->columns * image->rows;
2650  imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2651  if (clStatus != CL_SUCCESS)
2652  {
2653    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2654    goto cleanup;
2655  }
2656#endif
2657
2658  /* Create and initialize OpenCL buffers. */
2659  if (ALIGNED(equalize_map, PixelPacket))
2660  {
2661    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2662    hostPtr = equalize_map;
2663  }
2664  else
2665  {
2666    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2667    hostPtr = equalize_map;
2668  }
2669  /* create a CL buffer for eqaulize_map  */
2670  length = (MaxMap+1);
2671  equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2672  if (clStatus != CL_SUCCESS)
2673  {
2674    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2675    goto cleanup;
2676  }
2677
2678  /* get the OpenCL kernel */
2679  equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
2680  if (equalizeKernel == NULL)
2681  {
2682    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
2683    goto cleanup;
2684  }
2685
2686  /* set the kernel arguments */
2687  i = 0;
2688  clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2689  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
2690  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2691  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
2692  clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
2693  if (clStatus != CL_SUCCESS)
2694  {
2695    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2696    goto cleanup;
2697  }
2698
2699  /* launch the kernel */
2700  global_work_size[0] = image->columns;
2701  global_work_size[1] = image->rows;
2702
2703  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2704
2705  if (clStatus != CL_SUCCESS)
2706  {
2707    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2708    goto cleanup;
2709  }
2710  RecordProfileData(device,equalizeKernel,event);
2711
2712  /* read the data back */
2713  if (ALIGNED(inputPixels,CLPixelPacket))
2714  {
2715    length = image->columns * image->rows;
2716    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2717  }
2718  else
2719  {
2720    length = image->columns * image->rows;
2721    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2722  }
2723  if (clStatus != CL_SUCCESS)
2724  {
2725    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2726    goto cleanup;
2727  }
2728
2729  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2730
2731cleanup:
2732
2733  image_view=DestroyCacheView(image_view);
2734
2735  if (imageBuffer!=NULL)
2736    clEnv->library->clReleaseMemObject(imageBuffer);
2737  if (map!=NULL)
2738    map=(FloatPixelPacket *) RelinquishMagickMemory(map);
2739  if (equalizeMapBuffer!=NULL)
2740    clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2741  if (equalize_map!=NULL)
2742    equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2743  if (histogramBuffer!=NULL)
2744    clEnv->library->clReleaseMemObject(histogramBuffer);
2745  if (histogram!=NULL)
2746    histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2747  if (histogramKernel!=NULL)
2748    ReleaseOpenCLKernel(histogramKernel);
2749  if (equalizeKernel!=NULL)
2750    ReleaseOpenCLKernel(equalizeKernel);
2751  if (queue != NULL)
2752    ReleaseOpenCLCommandQueue(device, queue);
2753  if (device != NULL)
2754    ReleaseOpenCLDevice(device);
2755
2756  return(outputReady);
2757}
2758
2759MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2760  ExceptionInfo *exception)
2761{
2762  MagickBooleanType
2763    status;
2764
2765  MagickCLEnv
2766    clEnv;
2767
2768  assert(image != NULL);
2769  assert(exception != (ExceptionInfo *) NULL);
2770
2771  if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
2772      (checkHistogramCondition(image,image->intensity) == MagickFalse))
2773    return(MagickFalse);
2774
2775  clEnv=getOpenCLEnvironment(exception);
2776  if (clEnv == (MagickCLEnv) NULL)
2777    return(MagickFalse);
2778
2779  status=ComputeEqualizeImage(image,clEnv,exception);
2780  return(status);
2781}
2782
2783/*
2784%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2785%                                                                             %
2786%                                                                             %
2787%                                                                             %
2788%     A c c e l e r a t e F u n c t i o n I m a g e                           %
2789%                                                                             %
2790%                                                                             %
2791%                                                                             %
2792%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2793*/
2794
2795static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
2796  const MagickFunction function,const size_t number_parameters,
2797  const double *parameters,ExceptionInfo *exception)
2798{
2799  cl_command_queue
2800    queue;
2801
2802  cl_int
2803    status;
2804
2805  cl_kernel
2806    functionKernel;
2807
2808  cl_mem
2809    imageBuffer,
2810    parametersBuffer;
2811
2812  cl_uint
2813    number_params,
2814    number_channels;
2815
2816  float
2817    *parametersBufferPtr;
2818
2819  MagickBooleanType
2820    outputReady;
2821
2822  MagickCLDevice
2823    device;
2824
2825  size_t
2826    gsize[2],
2827    i;
2828
2829  outputReady=MagickFalse;
2830
2831  functionKernel=NULL;
2832  parametersBuffer=NULL;
2833
2834  device=RequestOpenCLDevice(clEnv);
2835  queue=AcquireOpenCLCommandQueue(device);
2836  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2837  if (imageBuffer == (cl_mem) NULL)
2838    goto cleanup;
2839
2840  parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
2841    sizeof(float));
2842  if (parametersBufferPtr == (float *) NULL)
2843    goto cleanup;
2844  for (i=0; i<number_parameters; i++)
2845    parametersBufferPtr[i]=(float) parameters[i];
2846  parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
2847    CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
2848    parametersBufferPtr);
2849  parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2850  if (parametersBuffer == (cl_mem) NULL)
2851  {
2852    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2853      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
2854    goto cleanup;
2855  }
2856
2857  functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
2858  if (functionKernel == (cl_kernel) NULL)
2859  {
2860    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2861      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2862    goto cleanup;
2863  }
2864
2865  number_channels=(cl_uint) image->number_channels;
2866  number_params=(cl_uint) number_parameters;
2867
2868  i=0;
2869  status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2870  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
2871  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
2872  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
2873  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
2874  status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
2875  if (status != CL_SUCCESS)
2876  {
2877    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2878      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2879    goto cleanup;
2880  }
2881
2882  gsize[0]=image->columns;
2883  gsize[1]=image->rows;
2884  outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
2885    gsize,(const size_t *) NULL,image,(const Image *) NULL,exception);
2886
2887cleanup:
2888
2889  if (parametersBuffer != (cl_mem) NULL)
2890    ReleaseOpenCLMemObject(parametersBuffer);
2891  if (functionKernel != (cl_kernel) NULL)
2892    ReleaseOpenCLKernel(functionKernel);
2893  if (queue != (cl_command_queue) NULL)
2894    ReleaseOpenCLCommandQueue(device,queue);
2895  if (device != (MagickCLDevice) NULL)
2896    ReleaseOpenCLDevice(device);
2897  return(outputReady);
2898}
2899
2900MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2901  const MagickFunction function,const size_t number_parameters,
2902  const double *parameters,ExceptionInfo *exception)
2903{
2904  MagickBooleanType
2905    status;
2906
2907  MagickCLEnv
2908    clEnv;
2909
2910  assert(image != NULL);
2911  assert(exception != (ExceptionInfo *) NULL);
2912
2913  if (checkAccelerateCondition(image) == MagickFalse)
2914    return(MagickFalse);
2915
2916  clEnv=getOpenCLEnvironment(exception);
2917  if (clEnv == (MagickCLEnv) NULL)
2918    return(MagickFalse);
2919
2920  status=ComputeFunctionImage(image,clEnv,function,number_parameters,
2921    parameters,exception);
2922  return(status);
2923}
2924
2925/*
2926%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2927%                                                                             %
2928%                                                                             %
2929%                                                                             %
2930%     A c c e l e r a t e G r a y s c a l e I m a g e                         %
2931%                                                                             %
2932%                                                                             %
2933%                                                                             %
2934%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2935*/
2936
2937static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
2938  const PixelIntensityMethod method,ExceptionInfo *exception)
2939{
2940  cl_command_queue
2941    queue;
2942
2943  cl_int
2944    status;
2945
2946  cl_kernel
2947    grayscaleKernel;
2948
2949  cl_mem
2950    imageBuffer;
2951
2952  cl_uint
2953    number_channels,
2954    colorspace,
2955    intensityMethod;
2956
2957  MagickBooleanType
2958    outputReady;
2959
2960  MagickCLDevice
2961    device;
2962
2963  size_t
2964    gsize[2],
2965    i;
2966
2967  outputReady=MagickFalse;
2968  grayscaleKernel=NULL;
2969
2970  assert(image != (Image *) NULL);
2971  assert(image->signature == MagickCoreSignature);
2972  device=RequestOpenCLDevice(clEnv);
2973  queue=AcquireOpenCLCommandQueue(device);
2974  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2975  if (imageBuffer == (cl_mem) NULL)
2976    goto cleanup;
2977
2978  grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
2979  if (grayscaleKernel == (cl_kernel) NULL)
2980  {
2981    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2982      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2983    goto cleanup;
2984  }
2985
2986  number_channels=(cl_uint) image->number_channels;
2987  intensityMethod=(cl_uint) method;
2988  colorspace=(cl_uint) image->colorspace;
2989
2990  i=0;
2991  status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2992  status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
2993  status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
2994  status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
2995  if (status != CL_SUCCESS)
2996  {
2997    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2998      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2999    goto cleanup;
3000  }
3001
3002  gsize[0]=image->columns;
3003  gsize[1]=image->rows;
3004  outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
3005    (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
3006    exception);
3007
3008cleanup:
3009
3010  if (grayscaleKernel != (cl_kernel) NULL)
3011    ReleaseOpenCLKernel(grayscaleKernel);
3012  if (queue != (cl_command_queue) NULL)
3013    ReleaseOpenCLCommandQueue(device,queue);
3014  if (device != (MagickCLDevice) NULL)
3015    ReleaseOpenCLDevice(device);
3016
3017  return(outputReady);
3018}
3019
3020MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
3021  const PixelIntensityMethod method,ExceptionInfo *exception)
3022{
3023  MagickBooleanType
3024    status;
3025
3026  MagickCLEnv
3027    clEnv;
3028
3029  assert(image != NULL);
3030  assert(exception != (ExceptionInfo *) NULL);
3031
3032  if ((checkAccelerateCondition(image) == MagickFalse) ||
3033      (checkPixelIntensity(image,method) == MagickFalse))
3034    return(MagickFalse);
3035
3036  if (image->number_channels < 3)
3037    return(MagickFalse);
3038
3039  if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
3040      (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
3041      (GetPixelBlueTraits(image) == UndefinedPixelTrait))
3042    return(MagickFalse);
3043
3044  clEnv=getOpenCLEnvironment(exception);
3045  if (clEnv == (MagickCLEnv) NULL)
3046    return(MagickFalse);
3047
3048  status=ComputeGrayscaleImage(image,clEnv,method,exception);
3049  return(status);
3050}
3051
3052/*
3053%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3054%                                                                             %
3055%                                                                             %
3056%                                                                             %
3057%     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
3058%                                                                             %
3059%                                                                             %
3060%                                                                             %
3061%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3062*/
3063
3064static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
3065  const double radius,const double strength,ExceptionInfo *exception)
3066{
3067  CacheView
3068    *filteredImage_view,
3069    *image_view;
3070
3071  cl_command_queue
3072    queue;
3073
3074  cl_int
3075    clStatus,
3076    iRadius;
3077
3078  cl_kernel
3079    blurRowKernel,
3080    blurColumnKernel;
3081
3082  cl_event
3083    event;
3084
3085  cl_mem
3086    filteredImageBuffer,
3087    imageBuffer,
3088    imageKernelBuffer,
3089    tempImageBuffer;
3090
3091  cl_mem_flags
3092    mem_flags;
3093
3094  const void
3095    *inputPixels;
3096
3097  Image
3098    *filteredImage;
3099
3100  MagickBooleanType
3101    outputReady;
3102
3103  MagickCLDevice
3104    device;
3105
3106  MagickSizeType
3107    length;
3108
3109  void
3110    *filteredPixels,
3111    *hostPtr;
3112
3113  unsigned int
3114    i,
3115    imageColumns,
3116    imageRows,
3117    passes;
3118
3119  filteredImage = NULL;
3120  filteredImage_view = NULL;
3121  imageBuffer = NULL;
3122  filteredImageBuffer = NULL;
3123  tempImageBuffer = NULL;
3124  imageKernelBuffer = NULL;
3125  blurRowKernel = NULL;
3126  blurColumnKernel = NULL;
3127  queue = NULL;
3128  outputReady = MagickFalse;
3129
3130  device = RequestOpenCLDevice(clEnv);
3131  queue = AcquireOpenCLCommandQueue(device);
3132
3133  /* Create and initialize OpenCL buffers. */
3134  {
3135    image_view=AcquireAuthenticCacheView(image,exception);
3136    inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3137    if (inputPixels == (const void *) NULL)
3138    {
3139      (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3140      goto cleanup;
3141    }
3142
3143    /* If the host pointer is aligned to the size of CLPixelPacket,
3144     then use the host buffer directly from the GPU; otherwise,
3145     create a buffer on the GPU and copy the data over */
3146    if (ALIGNED(inputPixels,CLPixelPacket))
3147    {
3148      mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3149    }
3150    else
3151    {
3152      mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3153    }
3154    /* create a CL buffer from image pixel buffer */
3155    length = image->columns * image->rows;
3156    imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3157    if (clStatus != CL_SUCCESS)
3158    {
3159      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3160      goto cleanup;
3161    }
3162  }
3163
3164  /* create output */
3165  {
3166    filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
3167    assert(filteredImage != NULL);
3168    if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3169    {
3170      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
3171      goto cleanup;
3172    }
3173    filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3174    filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3175    if (filteredPixels == (void *) NULL)
3176    {
3177      (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3178      goto cleanup;
3179    }
3180
3181    if (ALIGNED(filteredPixels,CLPixelPacket))
3182    {
3183      mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3184      hostPtr = filteredPixels;
3185    }
3186    else
3187    {
3188      mem_flags = CL_MEM_WRITE_ONLY;
3189      hostPtr = NULL;
3190    }
3191
3192    /* create a CL buffer from image pixel buffer */
3193    length = image->columns * image->rows;
3194    filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3195    if (clStatus != CL_SUCCESS)
3196    {
3197      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3198      goto cleanup;
3199    }
3200  }
3201
3202  {
3203    /* create temp buffer */
3204    {
3205      length = image->columns * image->rows;
3206      tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3207      if (clStatus != CL_SUCCESS)
3208      {
3209        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3210        goto cleanup;
3211      }
3212    }
3213
3214    /* get the opencl kernel */
3215    {
3216      blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
3217      if (blurRowKernel == NULL)
3218      {
3219        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3220        goto cleanup;
3221      };
3222
3223      blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
3224      if (blurColumnKernel == NULL)
3225      {
3226        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3227        goto cleanup;
3228      };
3229    }
3230
3231    {
3232      imageColumns = (unsigned int) image->columns;
3233      imageRows = (unsigned int) image->rows;
3234      iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); // Normalized radius, 100% gives blur radius of 20% of the largest dimension
3235
3236      passes = (((1.0f * imageColumns) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3237      passes = (passes < 1) ? 1: passes;
3238
3239      /* set the kernel arguments */
3240      i = 0;
3241      clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3242      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3243      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3244      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3245      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3246      clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3247
3248      if (clStatus != CL_SUCCESS)
3249      {
3250        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3251        goto cleanup;
3252      }
3253    }
3254
3255    /* launch the kernel */
3256    {
3257      int x;
3258      for (x = 0; x < passes; ++x) {
3259        size_t gsize[2];
3260        size_t wsize[2];
3261        size_t goffset[2];
3262
3263        gsize[0] = 256;
3264        gsize[1] = image->rows / passes;
3265        wsize[0] = 256;
3266        wsize[1] = 1;
3267        goffset[0] = 0;
3268        goffset[1] = x * gsize[1];
3269
3270        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3271        if (clStatus != CL_SUCCESS)
3272        {
3273          (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3274          goto cleanup;
3275        }
3276        clEnv->library->clFlush(queue);
3277        RecordProfileData(device,blurRowKernel,event);
3278      }
3279    }
3280
3281    {
3282      cl_float FStrength = strength;
3283      i = 0;
3284      clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3285      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3286      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3287      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3288      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3289      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3290      clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3291
3292      if (clStatus != CL_SUCCESS)
3293      {
3294        (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3295        goto cleanup;
3296      }
3297    }
3298
3299    /* launch the kernel */
3300    {
3301      int x;
3302      for (x = 0; x < passes; ++x) {
3303        size_t gsize[2];
3304        size_t wsize[2];
3305        size_t goffset[2];
3306
3307        gsize[0] = ((image->columns + 3) / 4) * 4;
3308        gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3309        wsize[0] = 4;
3310        wsize[1] = 64;
3311        goffset[0] = 0;
3312        goffset[1] = x * gsize[1];
3313
3314        clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3315        if (clStatus != CL_SUCCESS)
3316        {
3317          (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3318          goto cleanup;
3319        }
3320        clEnv->library->clFlush(queue);
3321        RecordProfileData(device,blurColumnKernel,event);
3322      }
3323    }
3324  }
3325
3326  /* get result */
3327  if (ALIGNED(filteredPixels,CLPixelPacket))
3328  {
3329    length = image->columns * image->rows;
3330    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3331  }
3332  else
3333  {
3334    length = image->columns * image->rows;
3335    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3336  }
3337  if (clStatus != CL_SUCCESS)
3338  {
3339    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3340    goto cleanup;
3341  }
3342
3343  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3344
3345cleanup:
3346
3347  image_view=DestroyCacheView(image_view);
3348  if (filteredImage_view != NULL)
3349    filteredImage_view=DestroyCacheView(filteredImage_view);
3350
3351  if (imageBuffer!=NULL)
3352    clEnv->library->clReleaseMemObject(imageBuffer);
3353  if (filteredImageBuffer!=NULL)
3354    clEnv->library->clReleaseMemObject(filteredImageBuffer);
3355  if (tempImageBuffer!=NULL)
3356    clEnv->library->clReleaseMemObject(tempImageBuffer);
3357  if (imageKernelBuffer!=NULL)
3358    clEnv->library->clReleaseMemObject(imageKernelBuffer);
3359  if (blurRowKernel!=NULL)
3360    ReleaseOpenCLKernel(blurRowKernel);
3361  if (blurColumnKernel!=NULL)
3362    ReleaseOpenCLKernel(blurColumnKernel);
3363  if (queue != NULL)
3364    ReleaseOpenCLCommandQueue(device, queue);
3365  if (device != NULL)
3366    ReleaseOpenCLDevice(device);
3367  if (outputReady == MagickFalse)
3368  {
3369    if (filteredImage != NULL)
3370    {
3371      DestroyImage(filteredImage);
3372      filteredImage = NULL;
3373    }
3374  }
3375
3376  return(filteredImage);
3377}
3378
3379MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3380  const double radius,const double strength,ExceptionInfo *exception)
3381{
3382  Image
3383    *filteredImage;
3384
3385  MagickCLEnv
3386    clEnv;
3387
3388  assert(image != NULL);
3389  assert(exception != (ExceptionInfo *) NULL);
3390
3391  if (checkAccelerateConditionRGBA(image) == MagickFalse)
3392    return((Image *) NULL);
3393
3394  clEnv=getOpenCLEnvironment(exception);
3395  if (clEnv == (MagickCLEnv) NULL)
3396    return((Image *) NULL);
3397
3398  filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
3399    exception);
3400  return(filteredImage);
3401}
3402
3403/*
3404%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3405%                                                                             %
3406%                                                                             %
3407%                                                                             %
3408%     A c c e l e r a t e M o d u l a t e I m a g e                           %
3409%                                                                             %
3410%                                                                             %
3411%                                                                             %
3412%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3413*/
3414
3415static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
3416  const double percent_brightness,const double percent_hue,
3417  const double percent_saturation,const ColorspaceType colorspace,
3418  ExceptionInfo *exception)
3419{
3420  CacheView
3421    *image_view;
3422
3423  cl_float
3424    bright,
3425    hue,
3426    saturation;
3427
3428  cl_command_queue
3429    queue;
3430
3431  cl_int
3432    color,
3433    clStatus;
3434
3435  cl_kernel
3436    modulateKernel;
3437
3438  cl_event
3439    event;
3440
3441  cl_mem
3442    imageBuffer;
3443
3444  cl_mem_flags
3445    mem_flags;
3446
3447  MagickBooleanType
3448    outputReady;
3449
3450  MagickCLDevice
3451    device;
3452
3453  MagickSizeType
3454    length;
3455
3456  register ssize_t
3457    i;
3458
3459  void
3460    *inputPixels;
3461
3462  inputPixels = NULL;
3463  imageBuffer = NULL;
3464  modulateKernel = NULL;
3465
3466  assert(image != (Image *) NULL);
3467  assert(image->signature == MagickCoreSignature);
3468  if (image->debug != MagickFalse)
3469    (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3470
3471  /*
3472   * initialize opencl env
3473   */
3474  device = RequestOpenCLDevice(clEnv);
3475  queue = AcquireOpenCLCommandQueue(device);
3476
3477  outputReady = MagickFalse;
3478
3479  /* Create and initialize OpenCL buffers.
3480   inputPixels = AcquirePixelCachePixels(image, &length, exception);
3481   assume this  will get a writable image
3482   */
3483  image_view=AcquireAuthenticCacheView(image,exception);
3484  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3485  if (inputPixels == (void *) NULL)
3486  {
3487    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3488    goto cleanup;
3489  }
3490
3491  /* If the host pointer is aligned to the size of CLPixelPacket,
3492   then use the host buffer directly from the GPU; otherwise,
3493   create a buffer on the GPU and copy the data over
3494   */
3495  if (ALIGNED(inputPixels,CLPixelPacket))
3496  {
3497    mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3498  }
3499  else
3500  {
3501    mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3502  }
3503  /* create a CL buffer from image pixel buffer */
3504  length = image->columns * image->rows;
3505  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3506  if (clStatus != CL_SUCCESS)
3507  {
3508    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3509    goto cleanup;
3510  }
3511
3512  modulateKernel = AcquireOpenCLKernel(device, "Modulate");
3513  if (modulateKernel == NULL)
3514  {
3515    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3516    goto cleanup;
3517  }
3518
3519  bright=percent_brightness;
3520  hue=percent_hue;
3521  saturation=percent_saturation;
3522  color=colorspace;
3523
3524  i = 0;
3525  clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3526  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3527  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3528  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3529  clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3530  if (clStatus != CL_SUCCESS)
3531  {
3532    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3533    goto cleanup;
3534  }
3535
3536  {
3537    size_t global_work_size[2];
3538    global_work_size[0] = image->columns;
3539    global_work_size[1] = image->rows;
3540    /* launch the kernel */
3541	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3542    if (clStatus != CL_SUCCESS)
3543    {
3544      (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3545      goto cleanup;
3546    }
3547    RecordProfileData(device,modulateKernel,event);
3548  }
3549
3550  if (ALIGNED(inputPixels,CLPixelPacket))
3551  {
3552    length = image->columns * image->rows;
3553    clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3554  }
3555  else
3556  {
3557    length = image->columns * image->rows;
3558    clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3559  }
3560  if (clStatus != CL_SUCCESS)
3561  {
3562    (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3563    goto cleanup;
3564  }
3565
3566  outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3567
3568cleanup:
3569
3570  image_view=DestroyCacheView(image_view);
3571
3572  if (imageBuffer!=NULL)
3573    clEnv->library->clReleaseMemObject(imageBuffer);
3574  if (modulateKernel!=NULL)
3575    ReleaseOpenCLKernel(modulateKernel);
3576  if (queue != NULL)
3577    ReleaseOpenCLCommandQueue(device,queue);
3578  if (device != NULL)
3579    ReleaseOpenCLDevice(device);
3580
3581  return outputReady;
3582
3583}
3584
3585MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3586  const double percent_brightness,const double percent_hue,
3587  const double percent_saturation,const ColorspaceType colorspace,
3588  ExceptionInfo *exception)
3589{
3590  MagickBooleanType
3591    status;
3592
3593  MagickCLEnv
3594    clEnv;
3595
3596  assert(image != NULL);
3597  assert(exception != (ExceptionInfo *) NULL);
3598
3599  if (checkAccelerateConditionRGBA(image) == MagickFalse)
3600    return(MagickFalse);
3601
3602  if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
3603    return(MagickFalse);
3604
3605  clEnv=getOpenCLEnvironment(exception);
3606  if (clEnv == (MagickCLEnv) NULL)
3607    return(MagickFalse);
3608
3609  status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
3610    percent_saturation,colorspace,exception);
3611  return(status);
3612}
3613
3614/*
3615%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3616%                                                                             %
3617%                                                                             %
3618%                                                                             %
3619%     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
3620%                                                                             %
3621%                                                                             %
3622%                                                                             %
3623%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3624*/
3625
3626static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
3627  const double *kernel,const size_t width,const OffsetInfo *offset,
3628  ExceptionInfo *exception)
3629{
3630  CacheView
3631    *filteredImage_view,
3632    *image_view;
3633
3634  cl_command_queue
3635    queue;
3636
3637  cl_float4
3638    biasPixel;
3639
3640  cl_int
3641    clStatus;
3642
3643  cl_kernel
3644    motionBlurKernel;
3645
3646  cl_event
3647    event;
3648
3649  cl_mem
3650    filteredImageBuffer,
3651    imageBuffer,
3652    imageKernelBuffer,
3653    offsetBuffer;
3654
3655  cl_mem_flags
3656    mem_flags;
3657
3658  const void
3659    *inputPixels;
3660
3661  float
3662    *kernelBufferPtr;
3663
3664  Image
3665    *filteredImage;
3666
3667  int
3668    *offsetBufferPtr;
3669
3670  MagickBooleanType
3671    outputReady;
3672
3673  MagickCLDevice
3674    device;
3675
3676  PixelInfo
3677    bias;
3678
3679  MagickSizeType
3680    length;
3681
3682  size_t
3683    global_work_size[2],
3684    local_work_size[2];
3685
3686  unsigned int
3687    i,
3688    imageHeight,
3689    imageWidth,
3690    matte;
3691
3692  void
3693    *filteredPixels,
3694    *hostPtr;
3695
3696  outputReady = MagickFalse;
3697  filteredImage = NULL;
3698  filteredImage_view = NULL;
3699  imageBuffer = NULL;
3700  filteredImageBuffer = NULL;
3701  imageKernelBuffer = NULL;
3702  motionBlurKernel = NULL;
3703  queue = NULL;
3704
3705  device = RequestOpenCLDevice(clEnv);
3706
3707  /* Create and initialize OpenCL buffers. */
3708
3709  image_view=AcquireAuthenticCacheView(image,exception);
3710  inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3711  if (inputPixels == (const void *) NULL)
3712  {
3713    (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3714      "UnableToReadPixelCache.","`%s'",image->filename);
3715    goto cleanup;
3716  }
3717
3718  // If the host pointer is aligned to the size of CLPixelPacket,
3719  // then use the host buffer directly from the GPU; otherwise,
3720  // create a buffer on the GPU and copy the data over
3721  if (ALIGNED(inputPixels,CLPixelPacket))
3722  {
3723    mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3724  }
3725  else
3726  {
3727    mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3728  }
3729  // create a CL buffer from image pixel buffer
3730  length = image->columns * image->rows;
3731  imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3732    length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3733  if (clStatus != CL_SUCCESS)
3734  {
3735    (void) ThrowMagickException(exception, GetMagickModule(),
3736      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3737    goto cleanup;
3738  }
3739
3740
3741  filteredImage = CloneImage(image,image->columns,image->rows,
3742    MagickTrue,exception);
3743  assert(filteredImage != NULL);
3744  if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3745  {
3746    (void) ThrowMagickException(exception, GetMagickModule(),
3747      ResourceLimitError, "CloneImage failed.", ".");
3748    goto cleanup;
3749  }
3750  filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3751  filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3752  if (filteredPixels == (void *) NULL)
3753  {
3754    (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3755      "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3756    goto cleanup;
3757  }
3758
3759  if (ALIGNED(filteredPixels,CLPixelPacket))
3760  {
3761    mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3762    hostPtr = filteredPixels;
3763  }
3764  else
3765  {
3766    mem_flags = CL_MEM_WRITE_ONLY;
3767    hostPtr = NULL;
3768  }
3769  // create a CL buffer from image pixel buffer
3770  length = image->columns * image->rows;
3771  filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3772    length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3773  if (clStatus != CL_SUCCESS)
3774  {
3775    (void) ThrowMagickException(exception, GetMagickModule(),
3776      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3777    goto cleanup;
3778  }
3779
3780
3781  imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
3782    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3783    &clStatus);
3784  if (clStatus != CL_SUCCESS)
3785  {
3786    (void) ThrowMagickException(exception, GetMagickModule(),
3787      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3788    goto cleanup;
3789  }
3790
3791  queue = AcquireOpenCLCommandQueue(device);
3792  kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
3793    CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
3794  if (clStatus != CL_SUCCESS)
3795  {
3796    (void) ThrowMagickException(exception, GetMagickModule(),
3797      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3798    goto cleanup;
3799  }
3800  for (i = 0; i < width; i++)
3801  {
3802    kernelBufferPtr[i] = (float) kernel[i];
3803  }
3804  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3805    0, NULL, NULL);
3806 if (clStatus != CL_SUCCESS)
3807  {
3808    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3809      "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3810    goto cleanup;
3811  }
3812
3813  offsetBuffer = clEnv->library->clCreateBuffer(device->context,
3814    CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3815    &clStatus);
3816  if (clStatus != CL_SUCCESS)
3817  {
3818    (void) ThrowMagickException(exception, GetMagickModule(),
3819      ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3820    goto cleanup;
3821  }
3822
3823  offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
3824    CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3825  if (clStatus != CL_SUCCESS)
3826  {
3827    (void) ThrowMagickException(exception, GetMagickModule(),
3828      ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3829    goto cleanup;
3830  }
3831  for (i = 0; i < width; i++)
3832  {
3833    offsetBufferPtr[2*i] = (int)offset[i].x;
3834    offsetBufferPtr[2*i+1] = (int)offset[i].y;
3835  }
3836  clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
3837    NULL, NULL);
3838 if (clStatus != CL_SUCCESS)
3839  {
3840    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3841      "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3842    goto cleanup;
3843  }
3844
3845
3846 // get the OpenCL kernel
3847  motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
3848  if (motionBlurKernel == NULL)
3849  {
3850    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3851      "AcquireOpenCLKernel failed.", ".");
3852    goto cleanup;
3853  }
3854
3855  // set the kernel arguments
3856  i = 0;
3857  clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3858    (void *)&imageBuffer);
3859  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3860    (void *)&filteredImageBuffer);
3861  imageWidth = (unsigned int) image->columns;
3862  imageHeight = (unsigned int) image->rows;
3863  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3864    &imageWidth);
3865  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3866    &imageHeight);
3867  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3868    (void *)&imageKernelBuffer);
3869  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3870    &width);
3871  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3872    (void *)&offsetBuffer);
3873
3874  GetPixelInfo(image,&bias);
3875  biasPixel.s[0] = bias.red;
3876  biasPixel.s[1] = bias.green;
3877  biasPixel.s[2] = bias.blue;
3878  biasPixel.s[3] = bias.alpha;
3879  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3880
3881  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
3882  matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3883  clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3884  if (clStatus != CL_SUCCESS)
3885  {
3886    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3887      "clEnv->library->clSetKernelArg failed.", ".");
3888    goto cleanup;
3889  }
3890
3891  // launch the kernel
3892  local_work_size[0] = 16;
3893  local_work_size[1] = 16;
3894  global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3895                                (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3896  global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3897                                (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3898  clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
3899	  global_work_size, local_work_size, 0, NULL, &event);
3900
3901  if (clStatus != CL_SUCCESS)
3902  {
3903    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3904      "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3905    goto cleanup;
3906  }
3907  RecordProfileData(device,motionBlurKernel,event);
3908
3909  if (ALIGNED(filteredPixels,CLPixelPacket))
3910  {
3911    length = image->columns * image->rows;
3912    clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
3913      CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
3914      NULL, &clStatus);
3915  }
3916  else
3917  {
3918    length = image->columns * image->rows;
3919    clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
3920      length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3921  }
3922  if (clStatus != CL_SUCCESS)
3923  {
3924    (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3925      "Reading output image from CL buffer failed.", ".");
3926    goto cleanup;
3927  }
3928  outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3929
3930cleanup:
3931
3932  image_view=DestroyCacheView(image_view);
3933  if (filteredImage_view != NULL)
3934    filteredImage_view=DestroyCacheView(filteredImage_view);
3935
3936  if (filteredImageBuffer!=NULL)
3937    clEnv->library->clReleaseMemObject(filteredImageBuffer);
3938  if (imageBuffer!=NULL)
3939    clEnv->library->clReleaseMemObject(imageBuffer);
3940  if (imageKernelBuffer!=NULL)
3941    clEnv->library->clReleaseMemObject(imageKernelBuffer);
3942  if (motionBlurKernel!=NULL)
3943    ReleaseOpenCLKernel(motionBlurKernel);
3944  if (queue != NULL)
3945    ReleaseOpenCLCommandQueue(device,queue);
3946  if (device != NULL)
3947    ReleaseOpenCLDevice(device);
3948  if (outputReady == MagickFalse && filteredImage != NULL)
3949    filteredImage=DestroyImage(filteredImage);
3950
3951  return(filteredImage);
3952}
3953
3954MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3955  const double* kernel,const size_t width,const OffsetInfo *offset,
3956  ExceptionInfo *exception)
3957{
3958  Image
3959    *filteredImage;
3960
3961  MagickCLEnv
3962    clEnv;
3963
3964  assert(image != NULL);
3965  assert(kernel != (double *) NULL);
3966  assert(offset != (OffsetInfo *) NULL);
3967  assert(exception != (ExceptionInfo *) NULL);
3968
3969  if (checkAccelerateConditionRGBA(image) == MagickFalse)
3970    return((Image *) NULL);
3971
3972  clEnv=getOpenCLEnvironment(exception);
3973  if (clEnv == (MagickCLEnv) NULL)
3974    return((Image *) NULL);
3975
3976  filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
3977    exception);
3978  return(filteredImage);
3979}
3980
3981/*
3982%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3983%                                                                             %
3984%                                                                             %
3985%                                                                             %
3986%     A c c e l e r a t e R e s i z e I m a g e                               %
3987%                                                                             %
3988%                                                                             %
3989%                                                                             %
3990%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3991*/
3992
3993static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
3994  cl_command_queue queue,const Image *image,Image *filteredImage,
3995  cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
3996  cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
3997  const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3998  const float xFactor,ExceptionInfo *exception)
3999{
4000  cl_kernel
4001    horizontalKernel;
4002
4003  cl_int
4004    status;
4005
4006  const unsigned int
4007    workgroupSize = 256;
4008
4009  float
4010    resizeFilterScale,
4011    resizeFilterSupport,
4012    resizeFilterWindowSupport,
4013    resizeFilterBlur,
4014    scale,
4015    support;
4016
4017  int
4018    cacheRangeStart,
4019    cacheRangeEnd,
4020    numCachedPixels,
4021    resizeFilterType,
4022    resizeWindowType;
4023
4024  MagickBooleanType
4025    outputReady;
4026
4027  size_t
4028    gammaAccumulatorLocalMemorySize,
4029    gsize[2],
4030    i,
4031    imageCacheLocalMemorySize,
4032    pixelAccumulatorLocalMemorySize,
4033    lsize[2],
4034    totalLocalMemorySize,
4035    weightAccumulatorLocalMemorySize;
4036
4037  unsigned int
4038    chunkSize,
4039    pixelPerWorkgroup;
4040
4041  horizontalKernel=NULL;
4042  outputReady=MagickFalse;
4043
4044  /*
4045  Apply filter to resize vertically from image to resize image.
4046  */
4047  scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
4048  support=scale*GetResizeFilterSupport(resizeFilter);
4049  if (support < 0.5)
4050  {
4051    /*
4052    Support too small even for nearest neighbour: Reduce to point
4053    sampling.
4054    */
4055    support=(MagickRealType) 0.5;
4056    scale=1.0;
4057  }
4058  scale=PerceptibleReciprocal(scale);
4059
4060  if (resizedColumns < workgroupSize)
4061  {
4062    chunkSize=32;
4063    pixelPerWorkgroup=32;
4064  }
4065  else
4066  {
4067    chunkSize=workgroupSize;
4068    pixelPerWorkgroup=workgroupSize;
4069  }
4070
4071DisableMSCWarning(4127)
4072  while(1)
4073RestoreMSCWarning
4074  {
4075    /* calculate the local memory size needed per workgroup */
4076    cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
4077    cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
4078      MagickEpsilon)+support+0.5);
4079    numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4080    imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4081      number_channels;
4082    totalLocalMemorySize=imageCacheLocalMemorySize;
4083
4084    /* local size for the pixel accumulator */
4085    pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4086    totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4087
4088    /* local memory size for the weight accumulator */
4089    weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4090    totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4091
4092    /* local memory size for the gamma accumulator */
4093    if ((number_channels == 4) || (number_channels == 2))
4094      gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4095    else
4096      gammaAccumulatorLocalMemorySize=sizeof(float);
4097    totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4098
4099    if (totalLocalMemorySize <= device->local_memory_size)
4100      break;
4101    else
4102    {
4103      pixelPerWorkgroup=pixelPerWorkgroup/2;
4104      chunkSize=chunkSize/2;
4105      if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4106      {
4107        /* quit, fallback to CPU */
4108        goto cleanup;
4109      }
4110    }
4111  }
4112
4113  resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4114  resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4115
4116  horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
4117  if (horizontalKernel == (cl_kernel) NULL)
4118  {
4119    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4120      ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
4121    goto cleanup;
4122  }
4123
4124  resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4125  resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4126  resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4127  resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4128
4129  i=0;
4130  status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4131  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4132  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
4133  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
4134  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4135  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4136  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4137  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
4138  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4139  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4140  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4141  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4142  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4143  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4144  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4145  status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
4146  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
4147  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
4148  status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
4149  status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
4150  status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
4151  status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
4152
4153  if (status != CL_SUCCESS)
4154  {
4155    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4156      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4157    goto cleanup;
4158  }
4159
4160  gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4161    workgroupSize;
4162  gsize[1]=resizedRows;
4163  lsize[0]=workgroupSize;
4164  lsize[1]=1;
4165  outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
4166    (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4167cleanup:
4168
4169  if (horizontalKernel != (cl_kernel) NULL)
4170    ReleaseOpenCLKernel(horizontalKernel);
4171
4172  return(outputReady);
4173}
4174
4175static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
4176  cl_command_queue queue,const Image *image,Image * filteredImage,
4177  cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4178  cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4179  const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4180  const float yFactor,ExceptionInfo *exception)
4181{
4182  cl_kernel
4183    verticalKernel;
4184
4185  cl_int
4186    status;
4187
4188  const unsigned int
4189    workgroupSize = 256;
4190
4191  float
4192    resizeFilterScale,
4193    resizeFilterSupport,
4194    resizeFilterWindowSupport,
4195    resizeFilterBlur,
4196    scale,
4197    support;
4198
4199  int
4200    cacheRangeStart,
4201    cacheRangeEnd,
4202    numCachedPixels,
4203    resizeFilterType,
4204    resizeWindowType;
4205
4206  MagickBooleanType
4207    outputReady;
4208
4209  size_t
4210    gammaAccumulatorLocalMemorySize,
4211    gsize[2],
4212    i,
4213    imageCacheLocalMemorySize,
4214    pixelAccumulatorLocalMemorySize,
4215    lsize[2],
4216    totalLocalMemorySize,
4217    weightAccumulatorLocalMemorySize;
4218
4219  unsigned int
4220    chunkSize,
4221    pixelPerWorkgroup;
4222
4223  verticalKernel=NULL;
4224  outputReady=MagickFalse;
4225
4226  /*
4227  Apply filter to resize vertically from image to resize image.
4228  */
4229  scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4230  support=scale*GetResizeFilterSupport(resizeFilter);
4231  if (support < 0.5)
4232  {
4233    /*
4234    Support too small even for nearest neighbour: Reduce to point
4235    sampling.
4236    */
4237    support=(MagickRealType) 0.5;
4238    scale=1.0;
4239  }
4240  scale=PerceptibleReciprocal(scale);
4241
4242  if (resizedRows < workgroupSize)
4243  {
4244    chunkSize=32;
4245    pixelPerWorkgroup=32;
4246  }
4247  else
4248  {
4249    chunkSize=workgroupSize;
4250    pixelPerWorkgroup=workgroupSize;
4251  }
4252
4253DisableMSCWarning(4127)
4254  while(1)
4255RestoreMSCWarning
4256  {
4257    /* calculate the local memory size needed per workgroup */
4258    cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4259    cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
4260      MagickEpsilon)+support+0.5);
4261    numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4262    imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4263      number_channels;
4264    totalLocalMemorySize=imageCacheLocalMemorySize;
4265
4266    /* local size for the pixel accumulator */
4267    pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4268    totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4269
4270    /* local memory size for the weight accumulator */
4271    weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4272    totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4273
4274    /* local memory size for the gamma accumulator */
4275    if ((number_channels == 4) || (number_channels == 2))
4276      gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4277    else
4278      gammaAccumulatorLocalMemorySize=sizeof(float);
4279    totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4280
4281    if (totalLocalMemorySize <= device->local_memory_size)
4282      break;
4283    else
4284    {
4285      pixelPerWorkgroup=pixelPerWorkgroup/2;
4286      chunkSize=chunkSize/2;
4287      if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4288      {
4289        /* quit, fallback to CPU */
4290        goto cleanup;
4291      }
4292    }
4293  }
4294
4295  resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4296  resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4297
4298  verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
4299  if (verticalKernel == (cl_kernel) NULL)
4300  {
4301    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4302      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4303    goto cleanup;
4304  }
4305
4306  resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4307  resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4308  resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4309  resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4310
4311  i=0;
4312  status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4313  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4314  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
4315  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
4316  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4317  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4318  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4319  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
4320  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4321  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4322  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4323  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4324  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4325  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4326  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4327  status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
4328  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
4329  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
4330  status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
4331  status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
4332  status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
4333  status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
4334
4335  if (status != CL_SUCCESS)
4336  {
4337    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4338      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4339    goto cleanup;
4340  }
4341
4342  gsize[0]=resizedColumns;
4343  gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4344    workgroupSize;
4345  lsize[0]=1;
4346  lsize[1]=workgroupSize;
4347  outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
4348    gsize,lsize,image,filteredImage,exception);
4349
4350cleanup:
4351
4352  if (verticalKernel != (cl_kernel) NULL)
4353    ReleaseOpenCLKernel(verticalKernel);
4354
4355  return(outputReady);
4356}
4357
4358static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
4359  const size_t resizedColumns,const size_t resizedRows,
4360  const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4361{
4362  cl_command_queue
4363    queue;
4364
4365  cl_mem
4366    cubicCoefficientsBuffer,
4367    filteredImageBuffer,
4368    imageBuffer,
4369    tempImageBuffer;
4370
4371  cl_uint
4372    number_channels;
4373
4374  const double
4375    *resizeFilterCoefficient;
4376
4377  float
4378    coefficientBuffer[7],
4379    xFactor,
4380    yFactor;
4381
4382  MagickBooleanType
4383    outputReady;
4384
4385  MagickCLDevice
4386    device;
4387
4388  MagickSizeType
4389    length;
4390
4391  Image
4392    *filteredImage;
4393
4394  size_t
4395    i;
4396
4397  filteredImage=NULL;
4398  tempImageBuffer=NULL;
4399  cubicCoefficientsBuffer=NULL;
4400  outputReady=MagickFalse;
4401
4402  device=RequestOpenCLDevice(clEnv);
4403  queue=AcquireOpenCLCommandQueue(device);
4404  filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
4405    exception);
4406  if (filteredImage == (Image *) NULL)
4407    goto cleanup;
4408  if (filteredImage->number_channels != image->number_channels)
4409    goto cleanup;
4410  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4411  if (imageBuffer == (cl_mem) NULL)
4412    goto cleanup;
4413  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4414  if (filteredImageBuffer == (cl_mem) NULL)
4415    goto cleanup;
4416
4417  resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4418  for (i = 0; i < 7; i++)
4419    coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4420  cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
4421    CL_MEM_READ_ONLY,7*sizeof(*resizeFilterCoefficient),&coefficientBuffer);
4422  if (cubicCoefficientsBuffer == (cl_mem) NULL)
4423  {
4424    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4425      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4426    goto cleanup;
4427  }
4428
4429  number_channels=(cl_uint) image->number_channels;
4430  xFactor=(float) resizedColumns/(float) image->columns;
4431  yFactor=(float) resizedRows/(float) image->rows;
4432  if (xFactor > yFactor)
4433  {
4434    length=resizedColumns*image->rows*number_channels;
4435    tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4436      sizeof(CLQuantum),(void *) NULL);
4437    if (tempImageBuffer == (cl_mem) NULL)
4438    {
4439      (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4440        ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4441      goto cleanup;
4442    }
4443
4444    outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4445      imageBuffer,number_channels,(cl_uint) image->columns,
4446      (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
4447      (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4448      exception);
4449    if (outputReady == MagickFalse)
4450      goto cleanup;
4451
4452    outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4453      tempImageBuffer,number_channels,(cl_uint) resizedColumns,
4454      (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
4455      (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4456      exception);
4457    if (outputReady == MagickFalse)
4458      goto cleanup;
4459  }
4460  else
4461  {
4462    length=image->columns*resizedRows*number_channels;
4463    tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4464      sizeof(CLQuantum),(void *) NULL);
4465    if (tempImageBuffer == (cl_mem) NULL)
4466    {
4467      (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4468        ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4469      goto cleanup;
4470    }
4471
4472    outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4473      imageBuffer,number_channels,(cl_uint) image->columns,
4474      (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
4475      (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4476      exception);
4477    if (outputReady == MagickFalse)
4478      goto cleanup;
4479
4480    outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4481      tempImageBuffer,number_channels,(cl_uint) image->columns,
4482      (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
4483      (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4484      exception);
4485    if (outputReady == MagickFalse)
4486      goto cleanup;
4487  }
4488
4489cleanup:
4490
4491  if (tempImageBuffer != (cl_mem) NULL)
4492    ReleaseOpenCLMemObject(tempImageBuffer);
4493  if (cubicCoefficientsBuffer != (cl_mem) NULL)
4494    ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
4495  if (queue != (cl_command_queue) NULL)
4496    ReleaseOpenCLCommandQueue(device,queue);
4497  if (device != (MagickCLDevice) NULL)
4498    ReleaseOpenCLDevice(device);
4499  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4500    filteredImage=DestroyImage(filteredImage);
4501
4502  return(filteredImage);
4503}
4504
4505static MagickBooleanType gpuSupportedResizeWeighting(
4506  ResizeWeightingFunctionType f)
4507{
4508  unsigned int
4509    i;
4510
4511  for (i = 0; ;i++)
4512  {
4513    if (supportedResizeWeighting[i] == LastWeightingFunction)
4514      break;
4515    if (supportedResizeWeighting[i] == f)
4516      return(MagickTrue);
4517  }
4518  return(MagickFalse);
4519}
4520
4521MagickPrivate Image *AccelerateResizeImage(const Image *image,
4522  const size_t resizedColumns,const size_t resizedRows,
4523  const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4524{
4525  Image
4526    *filteredImage;
4527
4528  MagickCLEnv
4529    clEnv;
4530
4531  assert(image != NULL);
4532  assert(exception != (ExceptionInfo *) NULL);
4533
4534  if (checkAccelerateCondition(image) == MagickFalse)
4535    return((Image *) NULL);
4536
4537  if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
4538         resizeFilter)) == MagickFalse) ||
4539      (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
4540         resizeFilter)) == MagickFalse))
4541    return((Image *) NULL);
4542
4543  clEnv=getOpenCLEnvironment(exception);
4544  if (clEnv == (MagickCLEnv) NULL)
4545    return((Image *) NULL);
4546
4547  filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
4548    resizeFilter,exception);
4549  return(filteredImage);
4550}
4551
4552/*
4553%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4554%                                                                             %
4555%                                                                             %
4556%                                                                             %
4557%     A c c e l e r a t e R o t a t i o n a l B l u r I m a g e               %
4558%                                                                             %
4559%                                                                             %
4560%                                                                             %
4561%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4562*/
4563
4564static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
4565  const double angle,ExceptionInfo *exception)
4566{
4567  cl_command_queue
4568    queue;
4569
4570  cl_float2
4571    blurCenter;
4572
4573  cl_int
4574    status;
4575
4576  cl_mem
4577    cosThetaBuffer,
4578    filteredImageBuffer,
4579    imageBuffer,
4580    sinThetaBuffer;
4581
4582  cl_kernel
4583    rotationalBlurKernel;
4584
4585  cl_uint
4586    cossin_theta_size,
4587    number_channels;
4588
4589  float
4590    blurRadius,
4591    *cosThetaPtr,
4592    offset,
4593    *sinThetaPtr,
4594    theta;
4595
4596  Image
4597    *filteredImage;
4598
4599  MagickBooleanType
4600    outputReady;
4601
4602  MagickCLDevice
4603    device;
4604
4605  size_t
4606    gsize[2],
4607    i;
4608
4609  filteredImage=NULL;
4610  sinThetaBuffer=NULL;
4611  cosThetaBuffer=NULL;
4612  rotationalBlurKernel=NULL;
4613  outputReady=MagickFalse;
4614
4615  device=RequestOpenCLDevice(clEnv);
4616  queue=AcquireOpenCLCommandQueue(device);
4617  filteredImage=cloneImage(image,exception);
4618  if (filteredImage == (Image *) NULL)
4619    goto cleanup;
4620  if (filteredImage->number_channels != image->number_channels)
4621    goto cleanup;
4622  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4623  if (imageBuffer == (cl_mem) NULL)
4624    goto cleanup;
4625  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4626  if (filteredImageBuffer == (cl_mem) NULL)
4627    goto cleanup;
4628
4629  blurCenter.x=(float) (image->columns-1)/2.0;
4630  blurCenter.y=(float) (image->rows-1)/2.0;
4631  blurRadius=hypot(blurCenter.x,blurCenter.y);
4632  cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
4633    (double) blurRadius)+2UL);
4634
4635  cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4636  if (cosThetaPtr == (float *) NULL)
4637    goto cleanup;
4638  sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4639  if (sinThetaPtr == (float *) NULL)
4640  {
4641    cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4642    goto cleanup;
4643  }
4644
4645  theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
4646  offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
4647  for (i=0; i < (ssize_t) cossin_theta_size; i++)
4648  {
4649    cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
4650    sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
4651  }
4652
4653  sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4654    CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
4655  sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
4656  cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4657    CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
4658  cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4659  if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
4660  {
4661    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4662      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4663    goto cleanup;
4664  }
4665
4666  rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
4667  if (rotationalBlurKernel == (cl_kernel) NULL)
4668  {
4669    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4670      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4671    goto cleanup;
4672  }
4673
4674  number_channels=(cl_uint) image->number_channels;
4675
4676  i=0;
4677  status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4678  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
4679  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
4680  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
4681  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
4682  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
4683  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
4684  status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4685  if (status != CL_SUCCESS)
4686  {
4687    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4688      ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4689    goto cleanup;
4690  }
4691
4692  gsize[0]=image->columns;
4693  gsize[1]=image->rows;
4694  outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
4695    (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
4696    exception);
4697
4698cleanup:
4699
4700  if (sinThetaBuffer != (cl_mem) NULL)
4701    ReleaseOpenCLMemObject(sinThetaBuffer);
4702  if (cosThetaBuffer != (cl_mem) NULL)
4703    ReleaseOpenCLMemObject(cosThetaBuffer);
4704  if (rotationalBlurKernel != (cl_kernel) NULL)
4705    ReleaseOpenCLKernel(rotationalBlurKernel);
4706  if (queue != (cl_command_queue) NULL)
4707    ReleaseOpenCLCommandQueue(device,queue);
4708  if (device != (MagickCLDevice) NULL)
4709    ReleaseOpenCLDevice(device);
4710  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4711    filteredImage=DestroyImage(filteredImage);
4712
4713  return(filteredImage);
4714}
4715
4716MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
4717  const double angle,ExceptionInfo *exception)
4718{
4719  Image
4720    *filteredImage;
4721
4722  MagickCLEnv
4723    clEnv;
4724
4725  assert(image != NULL);
4726  assert(exception != (ExceptionInfo *) NULL);
4727
4728  if (checkAccelerateCondition(image) == MagickFalse)
4729    return((Image *) NULL);
4730
4731  clEnv=getOpenCLEnvironment(exception);
4732  if (clEnv == (MagickCLEnv) NULL)
4733    return((Image *) NULL);
4734
4735  filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
4736  return filteredImage;
4737}
4738
4739/*
4740%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4741%                                                                             %
4742%                                                                             %
4743%                                                                             %
4744%     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
4745%                                                                             %
4746%                                                                             %
4747%                                                                             %
4748%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4749*/
4750
4751static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
4752  const double radius,const double sigma,const double gain,
4753  const double threshold,ExceptionInfo *exception)
4754{
4755  cl_command_queue
4756    queue;
4757
4758  cl_int
4759    status;
4760
4761  cl_kernel
4762    blurRowKernel,
4763    unsharpMaskBlurColumnKernel;
4764
4765  cl_mem
4766    filteredImageBuffer,
4767    imageBuffer,
4768    imageKernelBuffer,
4769    tempImageBuffer;
4770
4771  cl_uint
4772    imageColumns,
4773    imageRows,
4774    kernelWidth,
4775    number_channels;
4776
4777  float
4778    fGain,
4779    fThreshold;
4780
4781  Image
4782    *filteredImage;
4783
4784  int
4785    chunkSize;
4786
4787  MagickBooleanType
4788    outputReady;
4789
4790  MagickCLDevice
4791    device;
4792
4793  MagickSizeType
4794    length;
4795
4796  size_t
4797    gsize[2],
4798    i,
4799    lsize[2];
4800
4801  filteredImage=NULL;
4802  tempImageBuffer=NULL;
4803  imageKernelBuffer=NULL;
4804  blurRowKernel=NULL;
4805  unsharpMaskBlurColumnKernel=NULL;
4806  outputReady=MagickFalse;
4807
4808  device=RequestOpenCLDevice(clEnv);
4809  queue=AcquireOpenCLCommandQueue(device);
4810  filteredImage=cloneImage(image,exception);
4811  if (filteredImage == (Image *) NULL)
4812    goto cleanup;
4813  if (filteredImage->number_channels != image->number_channels)
4814    goto cleanup;
4815  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4816  if (imageBuffer == (cl_mem) NULL)
4817    goto cleanup;
4818  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4819  if (filteredImageBuffer == (cl_mem) NULL)
4820    goto cleanup;
4821
4822  imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4823    exception);
4824
4825  length=image->columns*image->rows;
4826  tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4827    sizeof(cl_float4),NULL);
4828  if (tempImageBuffer == (cl_mem) NULL)
4829  {
4830    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4831      ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4832    goto cleanup;
4833  }
4834
4835  blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
4836  if (blurRowKernel == (cl_kernel) NULL)
4837  {
4838    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4839      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4840    goto cleanup;
4841  }
4842
4843  unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
4844    "UnsharpMaskBlurColumn");
4845  if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
4846  {
4847    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4848      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4849    goto cleanup;
4850  }
4851
4852  number_channels=(cl_uint) image->number_channels;
4853  imageColumns=(cl_uint) image->columns;
4854  imageRows=(cl_uint) image->rows;
4855
4856  chunkSize = 256;
4857
4858  i=0;
4859  status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4860  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
4861  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
4862  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4863  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4864  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4865  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4866  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
4867  status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4868  if (status != CL_SUCCESS)
4869  {
4870    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4871      ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4872    goto cleanup;
4873  }
4874
4875  gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
4876  gsize[1]=image->rows;
4877  lsize[0]=chunkSize;
4878  lsize[1]=1;
4879  outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
4880    (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4881
4882  chunkSize=256;
4883  fGain=(float) gain;
4884  fThreshold=(float) threshold;
4885
4886  i=0;
4887  status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4888  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4889  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
4890  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
4891  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4892  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4893  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4894  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
4895  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4896  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4897  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4898  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4899  status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4900  if (status != CL_SUCCESS)
4901  {
4902    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4903      ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4904    goto cleanup;
4905  }
4906
4907  gsize[0]=image->columns;
4908  gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
4909  lsize[0]=1;
4910  lsize[1]=chunkSize;
4911  outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
4912    (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4913
4914cleanup:
4915
4916  if (tempImageBuffer != (cl_mem) NULL)
4917    ReleaseOpenCLMemObject(tempImageBuffer);
4918  if (imageKernelBuffer != (cl_mem) NULL)
4919    ReleaseOpenCLMemObject(imageKernelBuffer);
4920  if (blurRowKernel != (cl_kernel) NULL)
4921    ReleaseOpenCLKernel(blurRowKernel);
4922  if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
4923    ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
4924  if (queue != (cl_command_queue) NULL)
4925    ReleaseOpenCLCommandQueue(device,queue);
4926  if (device != (MagickCLDevice) NULL)
4927    ReleaseOpenCLDevice(device);
4928  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4929    filteredImage=DestroyImage(filteredImage);
4930
4931  return(filteredImage);
4932}
4933
4934static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4935  MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
4936  const double threshold,ExceptionInfo *exception)
4937{
4938  cl_command_queue
4939    queue;
4940
4941  cl_int
4942    status;
4943
4944  cl_kernel
4945    unsharpMaskKernel;
4946
4947  cl_mem
4948    filteredImageBuffer,
4949    imageBuffer,
4950    imageKernelBuffer;
4951
4952  cl_uint
4953    imageColumns,
4954    imageRows,
4955    kernelWidth,
4956    number_channels;
4957
4958  float
4959    fGain,
4960    fThreshold;
4961
4962  Image
4963    *filteredImage;
4964
4965  MagickBooleanType
4966    outputReady;
4967
4968  MagickCLDevice
4969    device;
4970
4971  size_t
4972    gsize[2],
4973    i,
4974    lsize[2];
4975
4976  filteredImage=NULL;
4977  imageKernelBuffer=NULL;
4978  unsharpMaskKernel=NULL;
4979  outputReady=MagickFalse;
4980
4981  device=RequestOpenCLDevice(clEnv);
4982  queue=AcquireOpenCLCommandQueue(device);
4983  filteredImage=cloneImage(image,exception);
4984  if (filteredImage == (Image *) NULL)
4985    goto cleanup;
4986  if (filteredImage->number_channels != image->number_channels)
4987    goto cleanup;
4988  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4989  if (imageBuffer == (cl_mem) NULL)
4990    goto cleanup;
4991  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4992  if (filteredImageBuffer == (cl_mem) NULL)
4993    goto cleanup;
4994
4995  imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4996    exception);
4997
4998  unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
4999  if (unsharpMaskKernel == NULL)
5000  {
5001    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5002      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5003    goto cleanup;
5004  }
5005
5006  imageColumns=(cl_uint) image->columns;
5007  imageRows=(cl_uint) image->rows;
5008  number_channels=(cl_uint) image->number_channels;
5009  fGain=(float) gain;
5010  fThreshold=(float) threshold;
5011
5012  i=0;
5013  status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5014  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5015  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
5016  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
5017  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
5018  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
5019  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
5020  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
5021  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
5022  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
5023  status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5024  if (status != CL_SUCCESS)
5025  {
5026    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5027      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5028    goto cleanup;
5029  }
5030
5031  gsize[0]=((image->columns + 7) / 8)*8;
5032  gsize[1]=((image->rows + 31) / 32)*32;
5033  lsize[0]=8;
5034  lsize[1]=32;
5035  outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
5036    gsize,lsize,image,filteredImage,exception);
5037
5038cleanup:
5039
5040  if (imageKernelBuffer != (cl_mem) NULL)
5041    ReleaseOpenCLMemObject(imageKernelBuffer);
5042  if (unsharpMaskKernel != (cl_kernel) NULL)
5043    ReleaseOpenCLKernel(unsharpMaskKernel);
5044  if (queue != (cl_command_queue) NULL)
5045    ReleaseOpenCLCommandQueue(device,queue);
5046  if (device != (MagickCLDevice) NULL)
5047    ReleaseOpenCLDevice(device);
5048  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5049    filteredImage=DestroyImage(filteredImage);
5050
5051  return(filteredImage);
5052}
5053
5054MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
5055  const double radius,const double sigma,const double gain,
5056  const double threshold,ExceptionInfo *exception)
5057{
5058  Image
5059    *filteredImage;
5060
5061  MagickCLEnv
5062    clEnv;
5063
5064  assert(image != NULL);
5065  assert(exception != (ExceptionInfo *) NULL);
5066
5067  if (checkAccelerateCondition(image) == MagickFalse)
5068    return((Image *) NULL);
5069
5070  clEnv=getOpenCLEnvironment(exception);
5071  if (clEnv == (MagickCLEnv) NULL)
5072    return((Image *) NULL);
5073
5074  if (radius < 12.1)
5075    filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
5076      threshold,exception);
5077  else
5078    filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
5079      threshold,exception);
5080  return(filteredImage);
5081}
5082
5083static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
5084  const double threshold,ExceptionInfo *exception)
5085{
5086  cl_command_queue
5087    queue;
5088
5089  const cl_int
5090    PASSES=5;
5091
5092  const int
5093    TILESIZE=64,
5094    PAD=1<<(PASSES-1),
5095    SIZE=TILESIZE-2*PAD;
5096
5097  cl_float
5098    thresh;
5099
5100  cl_int
5101    status;
5102
5103  cl_kernel
5104    denoiseKernel;
5105
5106  cl_mem
5107    filteredImageBuffer,
5108    imageBuffer;
5109
5110  cl_uint
5111    number_channels,
5112    width,
5113    height,
5114    max_channels;
5115
5116  Image
5117    *filteredImage;
5118
5119  MagickBooleanType
5120    outputReady;
5121
5122  MagickCLDevice
5123    device;
5124
5125  size_t
5126    gsize[2],
5127    i,
5128    lsize[2];
5129
5130  filteredImage=NULL;
5131  denoiseKernel=NULL;
5132  outputReady=MagickFalse;
5133
5134  device=RequestOpenCLDevice(clEnv);
5135  queue=AcquireOpenCLCommandQueue(device);
5136  filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
5137    exception);
5138  if (filteredImage == (Image *) NULL)
5139    goto cleanup;
5140  if (filteredImage->number_channels != image->number_channels)
5141    goto cleanup;
5142  imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5143  if (imageBuffer == (cl_mem) NULL)
5144    goto cleanup;
5145  filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5146  if (filteredImageBuffer == (cl_mem) NULL)
5147    goto cleanup;
5148
5149  denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
5150  if (denoiseKernel == (cl_kernel) NULL)
5151  {
5152    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5153      ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5154    goto cleanup;
5155  }
5156
5157  number_channels=(cl_uint)image->number_channels;
5158  width=(cl_uint)image->columns;
5159  height=(cl_uint)image->rows;
5160  max_channels=number_channels;
5161  if ((max_channels == 4) || (max_channels == 2))
5162    max_channels=max_channels-1;
5163  thresh=threshold;
5164
5165  i=0;
5166  status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5167  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5168  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5169  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
5170  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
5171  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
5172  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
5173  status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
5174  if (status != CL_SUCCESS)
5175  {
5176    (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5177      ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5178    goto cleanup;
5179  }
5180
5181  gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
5182  gsize[1]=((height+(SIZE-1))/SIZE)*4;
5183  lsize[0]=TILESIZE;
5184  lsize[1]=4;
5185  outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,(const size_t *) NULL,
5186    gsize,lsize,image,filteredImage,exception);
5187
5188cleanup:
5189
5190  if (denoiseKernel != (cl_kernel) NULL)
5191    ReleaseOpenCLKernel(denoiseKernel);
5192  if (queue != (cl_command_queue) NULL)
5193    ReleaseOpenCLCommandQueue(device,queue);
5194  if (device != (MagickCLDevice) NULL)
5195    ReleaseOpenCLDevice(device);
5196  if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5197    filteredImage=DestroyImage(filteredImage);
5198
5199  return(filteredImage);
5200}
5201
5202MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5203  const double threshold,ExceptionInfo *exception)
5204{
5205  Image
5206    *filteredImage;
5207
5208  MagickCLEnv
5209    clEnv;
5210
5211  assert(image != NULL);
5212  assert(exception != (ExceptionInfo *)NULL);
5213
5214  if (checkAccelerateCondition(image) == MagickFalse)
5215    return((Image *) NULL);
5216
5217  clEnv=getOpenCLEnvironment(exception);
5218  if (clEnv == (MagickCLEnv) NULL)
5219    return((Image *) NULL);
5220
5221  filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
5222
5223  return(filteredImage);
5224}
5225#endif /* MAGICKCORE_OPENCL_SUPPORT */
5226