1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                          License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16// Third party copyrights are property of their respective owners.
17//
18// Redistribution and use in source and binary forms, with or without modification,
19// are permitted provided that the following conditions are met:
20//
21//   * Redistribution's of source code must retain the above copyright notice,
22//     this list of conditions and the following disclaimer.
23//
24//   * Redistribution's in binary form must reproduce the above copyright notice,
25//     this list of conditions and the following disclaimer in the documentation
26//     and/or other materials provided with the distribution.
27//
28//   * The name of the copyright holders may not be used to endorse or promote products
29//     derived from this software without specific prior written permission.
30//
31// This software is provided by the copyright holders and contributors "as is" and
32// any express or implied warranties, including, but not limited to, the implied
33// warranties of merchantability and fitness for a particular purpose are disclaimed.
34// In no event shall the Intel Corporation or contributors be liable for any direct,
35// indirect, incidental, special, exemplary, or consequential damages
36// (including, but not limited to, procurement of substitute goods or services;
37// loss of use, data, or profits; or business interruption) however caused
38// and on any theory of liability, whether in contract, strict liability,
39// or tort (including negligence or otherwise) arising in any way out of
40// the use of this software, even if advised of the possibility of such damage.
41//
42//M*/
43
44#include "precomp.hpp"
45
46using namespace cv;
47using namespace cv::cuda;
48using namespace cv::cudacodec;
49
50#if !defined(HAVE_NVCUVID) || !defined(WIN32)
51
52cv::cudacodec::EncoderParams::EncoderParams() { throw_no_cuda(); }
53cv::cudacodec::EncoderParams::EncoderParams(const String&) { throw_no_cuda(); }
54void cv::cudacodec::EncoderParams::load(const String&) { throw_no_cuda(); }
55void cv::cudacodec::EncoderParams::save(const String&) const { throw_no_cuda(); }
56
57Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String&, Size, double, SurfaceFormat) { throw_no_cuda(); return Ptr<VideoWriter>(); }
58Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String&, Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); return Ptr<VideoWriter>(); }
59
60Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>&, Size, double, SurfaceFormat) { throw_no_cuda(); return Ptr<VideoWriter>(); }
61Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>&, Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); return Ptr<VideoWriter>(); }
62
63#else // !defined HAVE_CUDA || !defined WIN32
64
65void RGB_to_YV12(const GpuMat& src, GpuMat& dst);
66
67///////////////////////////////////////////////////////////////////////////
68// VideoWriterImpl
69
70namespace
71{
72    class NVEncoderWrapper
73    {
74    public:
75        NVEncoderWrapper() : encoder_(0)
76        {
77            int err;
78
79            err = NVGetHWEncodeCaps();
80            if (err)
81                CV_Error(Error::GpuNotSupported, "No CUDA capability present");
82
83            // Create the Encoder API Interface
84            err = NVCreateEncoder(&encoder_);
85            CV_Assert( err == 0 );
86        }
87
88        ~NVEncoderWrapper()
89        {
90            if (encoder_)
91                NVDestroyEncoder(encoder_);
92        }
93
94        operator NVEncoder() const
95        {
96            return encoder_;
97        }
98
99    private:
100        NVEncoder encoder_;
101    };
102
103    enum CodecType
104    {
105        MPEG1, // not supported yet
106        MPEG2, // not supported yet
107        MPEG4, // not supported yet
108        H264
109    };
110
111    class VideoWriterImpl : public VideoWriter
112    {
113    public:
114        VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, SurfaceFormat format, CodecType codec = H264);
115        VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec = H264);
116
117        void write(InputArray frame, bool lastFrame = false);
118
119        EncoderParams getEncoderParams() const;
120
121    private:
122        void initEncoder(double fps);
123        void setEncodeParams(const EncoderParams& params);
124        void initGpuMemory();
125        void initCallBacks();
126        void createHWEncoder();
127
128        Ptr<EncoderCallBack> callback_;
129        Size frameSize_;
130
131        CodecType codec_;
132        SurfaceFormat inputFormat_;
133        NVVE_SurfaceFormat surfaceFormat_;
134
135        NVEncoderWrapper encoder_;
136
137        GpuMat videoFrame_;
138        CUvideoctxlock cuCtxLock_;
139
140        // CallBacks
141
142        static unsigned char* NVENCAPI HandleAcquireBitStream(int* pBufferSize, void* pUserdata);
143        static void NVENCAPI HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata);
144        static void NVENCAPI HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata);
145        static void NVENCAPI HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata);
146    };
147
148    VideoWriterImpl::VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, SurfaceFormat format, CodecType codec) :
149        callback_(callback),
150        frameSize_(frameSize),
151        codec_(codec),
152        inputFormat_(format),
153        cuCtxLock_(0)
154    {
155        surfaceFormat_ = (inputFormat_ == SF_BGR ? YV12 : static_cast<NVVE_SurfaceFormat>(inputFormat_));
156
157        initEncoder(fps);
158
159        initGpuMemory();
160
161        initCallBacks();
162
163        createHWEncoder();
164    }
165
166    VideoWriterImpl::VideoWriterImpl(const Ptr<EncoderCallBack>& callback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec) :
167        callback_(callback),
168        frameSize_(frameSize),
169        codec_(codec),
170        inputFormat_(format),
171        cuCtxLock_(0)
172    {
173        surfaceFormat_ = (inputFormat_ == SF_BGR ? YV12 : static_cast<NVVE_SurfaceFormat>(inputFormat_));
174
175        initEncoder(fps);
176
177        setEncodeParams(params);
178
179        initGpuMemory();
180
181        initCallBacks();
182
183        createHWEncoder();
184    }
185
186    void VideoWriterImpl::initEncoder(double fps)
187    {
188        int err;
189
190        // Set codec
191
192        static const unsigned long codecs_id[] =
193        {
194            NV_CODEC_TYPE_MPEG1, NV_CODEC_TYPE_MPEG2, NV_CODEC_TYPE_MPEG4, NV_CODEC_TYPE_H264, NV_CODEC_TYPE_VC1
195        };
196        err = NVSetCodec(encoder_, codecs_id[codec_]);
197        if (err)
198            CV_Error(Error::StsNotImplemented, "Codec format is not supported");
199
200        // Set default params
201
202        err = NVSetDefaultParam(encoder_);
203        CV_Assert( err == 0 );
204
205        // Set some common params
206
207        int inputSize[] = { frameSize_.width, frameSize_.height };
208        err = NVSetParamValue(encoder_, NVVE_IN_SIZE, &inputSize);
209        CV_Assert( err == 0 );
210        err = NVSetParamValue(encoder_, NVVE_OUT_SIZE, &inputSize);
211        CV_Assert( err == 0 );
212
213        int aspectRatio[] = { frameSize_.width, frameSize_.height, ASPECT_RATIO_DAR };
214        err = NVSetParamValue(encoder_, NVVE_ASPECT_RATIO, &aspectRatio);
215        CV_Assert( err == 0 );
216
217        // FPS
218
219        int frame_rate = static_cast<int>(fps + 0.5);
220        int frame_rate_base = 1;
221        while (fabs(static_cast<double>(frame_rate) / frame_rate_base) - fps > 0.001)
222        {
223            frame_rate_base *= 10;
224            frame_rate = static_cast<int>(fps*frame_rate_base + 0.5);
225        }
226        int FrameRate[] = { frame_rate, frame_rate_base };
227        err = NVSetParamValue(encoder_, NVVE_FRAME_RATE, &FrameRate);
228        CV_Assert( err == 0 );
229
230        // Select device for encoding
231
232        int gpuID = getDevice();
233        err = NVSetParamValue(encoder_, NVVE_FORCE_GPU_SELECTION, &gpuID);
234        CV_Assert( err == 0 );
235    }
236
237    void VideoWriterImpl::setEncodeParams(const EncoderParams& params)
238    {
239        int err;
240
241        int P_Interval = params.P_Interval;
242        err = NVSetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval);
243        CV_Assert( err == 0 );
244
245        int IDR_Period = params.IDR_Period;
246        err = NVSetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period);
247        CV_Assert( err == 0 );
248
249        int DynamicGOP = params.DynamicGOP;
250        err = NVSetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP);
251        CV_Assert( err == 0 );
252
253        NVVE_RateCtrlType RCType = static_cast<NVVE_RateCtrlType>(params.RCType);
254        err = NVSetParamValue(encoder_, NVVE_RC_TYPE, &RCType);
255        CV_Assert( err == 0 );
256
257        int AvgBitrate = params.AvgBitrate;
258        err = NVSetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate);
259        CV_Assert( err == 0 );
260
261        int PeakBitrate = params.PeakBitrate;
262        err = NVSetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate);
263        CV_Assert( err == 0 );
264
265        int QP_Level_Intra = params.QP_Level_Intra;
266        err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra);
267        CV_Assert( err == 0 );
268
269        int QP_Level_InterP = params.QP_Level_InterP;
270        err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP);
271        CV_Assert( err == 0 );
272
273        int QP_Level_InterB = params.QP_Level_InterB;
274        err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB);
275        CV_Assert( err == 0 );
276
277        int DeblockMode = params.DeblockMode;
278        err = NVSetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode);
279        CV_Assert( err == 0 );
280
281        int ProfileLevel = params.ProfileLevel;
282        err = NVSetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel);
283        CV_Assert( err == 0 );
284
285        int ForceIntra = params.ForceIntra;
286        err = NVSetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra);
287        CV_Assert( err == 0 );
288
289        int ForceIDR = params.ForceIDR;
290        err = NVSetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR);
291        CV_Assert( err == 0 );
292
293        int ClearStat = params.ClearStat;
294        err = NVSetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat);
295        CV_Assert( err == 0 );
296
297        NVVE_DI_MODE DIMode = static_cast<NVVE_DI_MODE>(params.DIMode);
298        err = NVSetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode);
299        CV_Assert( err == 0 );
300
301        if (params.Presets != -1)
302        {
303            NVVE_PRESETS_TARGET Presets = static_cast<NVVE_PRESETS_TARGET>(params.Presets);
304            err = NVSetParamValue(encoder_, NVVE_PRESETS, &Presets);
305            CV_Assert( err == 0 );
306        }
307
308        int DisableCabac = params.DisableCabac;
309        err = NVSetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac);
310        CV_Assert( err == 0 );
311
312        int NaluFramingType = params.NaluFramingType;
313        err = NVSetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType);
314        CV_Assert( err == 0 );
315
316        int DisableSPSPPS = params.DisableSPSPPS;
317        err = NVSetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS);
318        CV_Assert( err == 0 );
319    }
320
321    EncoderParams VideoWriterImpl::getEncoderParams() const
322    {
323        int err;
324
325        EncoderParams params;
326
327        int P_Interval;
328        err = NVGetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval);
329        CV_Assert( err == 0 );
330        params.P_Interval = P_Interval;
331
332        int IDR_Period;
333        err = NVGetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period);
334        CV_Assert( err == 0 );
335        params.IDR_Period = IDR_Period;
336
337        int DynamicGOP;
338        err = NVGetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP);
339        CV_Assert( err == 0 );
340        params.DynamicGOP = DynamicGOP;
341
342        NVVE_RateCtrlType RCType;
343        err = NVGetParamValue(encoder_, NVVE_RC_TYPE, &RCType);
344        CV_Assert( err == 0 );
345        params.RCType = RCType;
346
347        int AvgBitrate;
348        err = NVGetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate);
349        CV_Assert( err == 0 );
350        params.AvgBitrate = AvgBitrate;
351
352        int PeakBitrate;
353        err = NVGetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate);
354        CV_Assert( err == 0 );
355        params.PeakBitrate = PeakBitrate;
356
357        int QP_Level_Intra;
358        err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra);
359        CV_Assert( err == 0 );
360        params.QP_Level_Intra = QP_Level_Intra;
361
362        int QP_Level_InterP;
363        err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP);
364        CV_Assert( err == 0 );
365        params.QP_Level_InterP = QP_Level_InterP;
366
367        int QP_Level_InterB;
368        err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB);
369        CV_Assert( err == 0 );
370        params.QP_Level_InterB = QP_Level_InterB;
371
372        int DeblockMode;
373        err = NVGetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode);
374        CV_Assert( err == 0 );
375        params.DeblockMode = DeblockMode;
376
377        int ProfileLevel;
378        err = NVGetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel);
379        CV_Assert( err == 0 );
380        params.ProfileLevel = ProfileLevel;
381
382        int ForceIntra;
383        err = NVGetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra);
384        CV_Assert( err == 0 );
385        params.ForceIntra = ForceIntra;
386
387        int ForceIDR;
388        err = NVGetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR);
389        CV_Assert( err == 0 );
390        params.ForceIDR = ForceIDR;
391
392        int ClearStat;
393        err = NVGetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat);
394        CV_Assert( err == 0 );
395        params.ClearStat = ClearStat;
396
397        NVVE_DI_MODE DIMode;
398        err = NVGetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode);
399        CV_Assert( err == 0 );
400        params.DIMode = DIMode;
401
402        params.Presets = -1;
403
404        int DisableCabac;
405        err = NVGetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac);
406        CV_Assert( err == 0 );
407        params.DisableCabac = DisableCabac;
408
409        int NaluFramingType;
410        err = NVGetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType);
411        CV_Assert( err == 0 );
412        params.NaluFramingType = NaluFramingType;
413
414        int DisableSPSPPS;
415        err = NVGetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS);
416        CV_Assert( err == 0 );
417        params.DisableSPSPPS = DisableSPSPPS;
418
419        return params;
420    }
421
422    void VideoWriterImpl::initGpuMemory()
423    {
424        int err;
425
426        // initialize context
427        GpuMat temp(1, 1, CV_8U);
428        temp.release();
429
430        static const int bpp[] =
431        {
432            16, // UYVY, 4:2:2
433            16, // YUY2, 4:2:2
434            12, // YV12, 4:2:0
435            12, // NV12, 4:2:0
436            12, // IYUV, 4:2:0
437        };
438
439        CUcontext cuContext;
440        cuSafeCall( cuCtxGetCurrent(&cuContext) );
441
442        // Allocate the CUDA memory Pitched Surface
443        if (surfaceFormat_ == UYVY || surfaceFormat_ == YUY2)
444            videoFrame_.create(frameSize_.height, (frameSize_.width * bpp[surfaceFormat_]) / 8, CV_8UC1);
445        else
446            videoFrame_.create((frameSize_.height * bpp[surfaceFormat_]) / 8, frameSize_.width, CV_8UC1);
447
448        // Create the Video Context Lock (used for synchronization)
449        cuSafeCall( cuvidCtxLockCreate(&cuCtxLock_, cuContext) );
450
451        // If we are using GPU Device Memory with NVCUVENC, it is necessary to create a
452        // CUDA Context with a Context Lock cuvidCtxLock.  The Context Lock needs to be passed to NVCUVENC
453
454        int iUseDeviceMem = 1;
455        err = NVSetParamValue(encoder_, NVVE_DEVICE_MEMORY_INPUT, &iUseDeviceMem);
456        CV_Assert( err == 0 );
457
458        err = NVSetParamValue(encoder_, NVVE_DEVICE_CTX_LOCK, &cuCtxLock_);
459        CV_Assert( err == 0 );
460    }
461
462    void VideoWriterImpl::initCallBacks()
463    {
464        NVVE_CallbackParams cb;
465        memset(&cb, 0, sizeof(NVVE_CallbackParams));
466
467        cb.pfnacquirebitstream = HandleAcquireBitStream;
468        cb.pfnonbeginframe     = HandleOnBeginFrame;
469        cb.pfnonendframe       = HandleOnEndFrame;
470        cb.pfnreleasebitstream = HandleReleaseBitStream;
471
472        NVRegisterCB(encoder_, cb, this);
473    }
474
475    void VideoWriterImpl::createHWEncoder()
476    {
477        int err;
478
479        // Create the NVIDIA HW resources for Encoding on NVIDIA hardware
480        err = NVCreateHWEncoder(encoder_);
481        CV_Assert( err == 0 );
482    }
483
484    // UYVY/YUY2 are both 4:2:2 formats (16bpc)
485    // Luma, U, V are interleaved, chroma is subsampled (w/2,h)
486    void copyUYVYorYUY2Frame(Size frameSize, const GpuMat& src, GpuMat& dst)
487    {
488        // Source is YUVY/YUY2 4:2:2, the YUV data in a packed and interleaved
489
490        // YUV Copy setup
491        CUDA_MEMCPY2D stCopyYUV422;
492        memset(&stCopyYUV422, 0, sizeof(CUDA_MEMCPY2D));
493
494        stCopyYUV422.srcXInBytes          = 0;
495        stCopyYUV422.srcY                 = 0;
496        stCopyYUV422.srcMemoryType        = CU_MEMORYTYPE_DEVICE;
497        stCopyYUV422.srcHost              = 0;
498        stCopyYUV422.srcDevice            = (CUdeviceptr) src.data;
499        stCopyYUV422.srcArray             = 0;
500        stCopyYUV422.srcPitch             = src.step;
501
502        stCopyYUV422.dstXInBytes          = 0;
503        stCopyYUV422.dstY                 = 0;
504        stCopyYUV422.dstMemoryType        = CU_MEMORYTYPE_DEVICE;
505        stCopyYUV422.dstHost              = 0;
506        stCopyYUV422.dstDevice            = (CUdeviceptr) dst.data;
507        stCopyYUV422.dstArray             = 0;
508        stCopyYUV422.dstPitch             = dst.step;
509
510        stCopyYUV422.WidthInBytes         = frameSize.width * 2;
511        stCopyYUV422.Height               = frameSize.height;
512
513        // DMA Luma/Chroma
514        cuSafeCall( cuMemcpy2D(&stCopyYUV422) );
515    }
516
517    // YV12/IYUV are both 4:2:0 planar formats (12bpc)
518    // Luma, U, V chroma planar (12bpc), chroma is subsampled (w/2,h/2)
519    void copyYV12orIYUVFrame(Size frameSize, const GpuMat& src, GpuMat& dst)
520    {
521        // Source is YV12/IYUV, this native format is converted to NV12 format by the video encoder
522
523        // (1) luma copy setup
524        CUDA_MEMCPY2D stCopyLuma;
525        memset(&stCopyLuma, 0, sizeof(CUDA_MEMCPY2D));
526
527        stCopyLuma.srcXInBytes          = 0;
528        stCopyLuma.srcY                 = 0;
529        stCopyLuma.srcMemoryType        = CU_MEMORYTYPE_DEVICE;
530        stCopyLuma.srcHost              = 0;
531        stCopyLuma.srcDevice            = (CUdeviceptr) src.data;
532        stCopyLuma.srcArray             = 0;
533        stCopyLuma.srcPitch             = src.step;
534
535        stCopyLuma.dstXInBytes          = 0;
536        stCopyLuma.dstY                 = 0;
537        stCopyLuma.dstMemoryType        = CU_MEMORYTYPE_DEVICE;
538        stCopyLuma.dstHost              = 0;
539        stCopyLuma.dstDevice            = (CUdeviceptr) dst.data;
540        stCopyLuma.dstArray             = 0;
541        stCopyLuma.dstPitch             = dst.step;
542
543        stCopyLuma.WidthInBytes         = frameSize.width;
544        stCopyLuma.Height               = frameSize.height;
545
546        // (2) chroma copy setup, U/V can be done together
547        CUDA_MEMCPY2D stCopyChroma;
548        memset(&stCopyChroma, 0, sizeof(CUDA_MEMCPY2D));
549
550        stCopyChroma.srcXInBytes        = 0;
551        stCopyChroma.srcY               = frameSize.height << 1; // U/V chroma offset
552        stCopyChroma.srcMemoryType      = CU_MEMORYTYPE_DEVICE;
553        stCopyChroma.srcHost            = 0;
554        stCopyChroma.srcDevice          = (CUdeviceptr) src.data;
555        stCopyChroma.srcArray           = 0;
556        stCopyChroma.srcPitch           = src.step >> 1; // chroma is subsampled by 2 (but it has U/V are next to each other)
557
558        stCopyChroma.dstXInBytes        = 0;
559        stCopyChroma.dstY               = frameSize.height << 1; // chroma offset (srcY*srcPitch now points to the chroma planes)
560        stCopyChroma.dstMemoryType      = CU_MEMORYTYPE_DEVICE;
561        stCopyChroma.dstHost            = 0;
562        stCopyChroma.dstDevice          = (CUdeviceptr) dst.data;
563        stCopyChroma.dstArray           = 0;
564        stCopyChroma.dstPitch           = dst.step >> 1;
565
566        stCopyChroma.WidthInBytes       = frameSize.width >> 1;
567        stCopyChroma.Height             = frameSize.height; // U/V are sent together
568
569        // DMA Luma
570        cuSafeCall( cuMemcpy2D(&stCopyLuma) );
571
572        // DMA Chroma channels (UV side by side)
573        cuSafeCall( cuMemcpy2D(&stCopyChroma) );
574    }
575
576    // NV12 is 4:2:0 format (12bpc)
577    // Luma followed by U/V chroma interleaved (12bpc), chroma is subsampled (w/2,h/2)
578    void copyNV12Frame(Size frameSize, const GpuMat& src, GpuMat& dst)
579    {
580        // Source is NV12 in pitch linear memory
581        // Because we are assume input is NV12 (if we take input in the native format), the encoder handles NV12 as a native format in pitch linear memory
582
583        // Luma/Chroma can be done in a single transfer
584        CUDA_MEMCPY2D stCopyNV12;
585        memset(&stCopyNV12, 0, sizeof(CUDA_MEMCPY2D));
586
587        stCopyNV12.srcXInBytes          = 0;
588        stCopyNV12.srcY                 = 0;
589        stCopyNV12.srcMemoryType        = CU_MEMORYTYPE_DEVICE;
590        stCopyNV12.srcHost              = 0;
591        stCopyNV12.srcDevice            = (CUdeviceptr) src.data;
592        stCopyNV12.srcArray             = 0;
593        stCopyNV12.srcPitch             = src.step;
594
595        stCopyNV12.dstXInBytes          = 0;
596        stCopyNV12.dstY                 = 0;
597        stCopyNV12.dstMemoryType        = CU_MEMORYTYPE_DEVICE;
598        stCopyNV12.dstHost              = 0;
599        stCopyNV12.dstDevice            = (CUdeviceptr) dst.data;
600        stCopyNV12.dstArray             = 0;
601        stCopyNV12.dstPitch             = dst.step;
602
603        stCopyNV12.WidthInBytes         = frameSize.width;
604        stCopyNV12.Height               = (frameSize.height * 3) >> 1;
605
606        // DMA Luma/Chroma
607        cuSafeCall( cuMemcpy2D(&stCopyNV12) );
608    }
609
610    void VideoWriterImpl::write(InputArray _frame, bool lastFrame)
611    {
612        GpuMat frame = _frame.getGpuMat();
613
614        if (inputFormat_ == SF_BGR)
615        {
616            CV_Assert( frame.size() == frameSize_ );
617            CV_Assert( frame.type() == CV_8UC1 || frame.type() == CV_8UC3 || frame.type() == CV_8UC4 );
618        }
619        else
620        {
621            CV_Assert( frame.size() == videoFrame_.size() );
622            CV_Assert( frame.type() == videoFrame_.type() );
623        }
624
625        NVVE_EncodeFrameParams efparams;
626        efparams.Width = frameSize_.width;
627        efparams.Height = frameSize_.height;
628        efparams.Pitch = static_cast<int>(videoFrame_.step);
629        efparams.SurfFmt = surfaceFormat_;
630        efparams.PictureStruc = FRAME_PICTURE;
631        efparams.topfieldfirst =  0;
632        efparams.repeatFirstField = 0;
633        efparams.progressiveFrame = (surfaceFormat_ == NV12) ? 1 : 0;
634        efparams.bLast = lastFrame;
635        efparams.picBuf = 0; // Must be set to NULL in order to support device memory input
636
637        // Don't forget we need to lock/unlock between memcopies
638        cuSafeCall( cuvidCtxLock(cuCtxLock_, 0) );
639
640        if (inputFormat_ == SF_BGR)
641        {
642            RGB_to_YV12(frame, videoFrame_);
643        }
644        else
645        {
646            switch (surfaceFormat_)
647            {
648            case UYVY: // UYVY (4:2:2)
649            case YUY2: // YUY2 (4:2:2)
650                copyUYVYorYUY2Frame(frameSize_, frame, videoFrame_);
651                break;
652
653            case YV12: // YV12 (4:2:0), Y V U
654            case IYUV: // IYUV (4:2:0), Y U V
655                copyYV12orIYUVFrame(frameSize_, frame, videoFrame_);
656                break;
657
658            case NV12: // NV12 (4:2:0)
659                copyNV12Frame(frameSize_, frame, videoFrame_);
660                break;
661            }
662        }
663
664        cuSafeCall( cuvidCtxUnlock(cuCtxLock_, 0) );
665
666        int err = NVEncodeFrame(encoder_, &efparams, 0, videoFrame_.data);
667        CV_Assert( err == 0 );
668    }
669
670    unsigned char* NVENCAPI VideoWriterImpl::HandleAcquireBitStream(int* pBufferSize, void* pUserdata)
671    {
672        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);
673
674        return thiz->callback_->acquireBitStream(pBufferSize);
675    }
676
677    void NVENCAPI VideoWriterImpl::HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata)
678    {
679        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);
680
681        thiz->callback_->releaseBitStream(cb, nBytesInBuffer);
682    }
683
684    void NVENCAPI VideoWriterImpl::HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata)
685    {
686        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);
687
688        thiz->callback_->onBeginFrame(pbfi->nFrameNumber, static_cast<EncoderCallBack::PicType>(pbfi->nPicType));
689    }
690
691    void NVENCAPI VideoWriterImpl::HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata)
692    {
693        VideoWriterImpl* thiz = static_cast<VideoWriterImpl*>(pUserdata);
694
695        thiz->callback_->onEndFrame(pefi->nFrameNumber, static_cast<EncoderCallBack::PicType>(pefi->nPicType));
696    }
697
698    ///////////////////////////////////////////////////////////////////////////
699    // FFMPEG
700
701    class EncoderCallBackFFMPEG : public EncoderCallBack
702    {
703    public:
704        EncoderCallBackFFMPEG(const String& fileName, Size frameSize, double fps);
705        ~EncoderCallBackFFMPEG();
706
707        unsigned char* acquireBitStream(int* bufferSize);
708        void releaseBitStream(unsigned char* data, int size);
709        void onBeginFrame(int frameNumber, PicType picType);
710        void onEndFrame(int frameNumber, PicType picType);
711
712    private:
713        static bool init_MediaStream_FFMPEG();
714
715        struct OutputMediaStream_FFMPEG* stream_;
716        std::vector<uchar> buf_;
717        bool isKeyFrame_;
718
719        static Create_OutputMediaStream_FFMPEG_Plugin create_OutputMediaStream_FFMPEG_p;
720        static Release_OutputMediaStream_FFMPEG_Plugin release_OutputMediaStream_FFMPEG_p;
721        static Write_OutputMediaStream_FFMPEG_Plugin write_OutputMediaStream_FFMPEG_p;
722    };
723
724    Create_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::create_OutputMediaStream_FFMPEG_p = 0;
725    Release_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::release_OutputMediaStream_FFMPEG_p = 0;
726    Write_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::write_OutputMediaStream_FFMPEG_p = 0;
727
728    bool EncoderCallBackFFMPEG::init_MediaStream_FFMPEG()
729    {
730        static bool initialized = false;
731
732        if (!initialized)
733        {
734            #if defined(WIN32) || defined(_WIN32)
735                const char* module_name = "opencv_ffmpeg"
736                    CVAUX_STR(CV_VERSION_MAJOR) CVAUX_STR(CV_VERSION_MINOR) CVAUX_STR(CV_VERSION_REVISION)
737                #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__)
738                    "_64"
739                #endif
740                    ".dll";
741
742                static HMODULE cvFFOpenCV = LoadLibrary(module_name);
743
744                if (cvFFOpenCV)
745                {
746                    create_OutputMediaStream_FFMPEG_p =
747                        (Create_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "create_OutputMediaStream_FFMPEG");
748                    release_OutputMediaStream_FFMPEG_p =
749                        (Release_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "release_OutputMediaStream_FFMPEG");
750                    write_OutputMediaStream_FFMPEG_p =
751                        (Write_OutputMediaStream_FFMPEG_Plugin)GetProcAddress(cvFFOpenCV, "write_OutputMediaStream_FFMPEG");
752
753                    initialized = create_OutputMediaStream_FFMPEG_p != 0 && release_OutputMediaStream_FFMPEG_p != 0 && write_OutputMediaStream_FFMPEG_p != 0;
754                }
755            #elif defined(HAVE_FFMPEG)
756                create_OutputMediaStream_FFMPEG_p = create_OutputMediaStream_FFMPEG;
757                release_OutputMediaStream_FFMPEG_p = release_OutputMediaStream_FFMPEG;
758                write_OutputMediaStream_FFMPEG_p = write_OutputMediaStream_FFMPEG;
759
760                initialized = true;
761            #endif
762        }
763
764        return initialized;
765    }
766
767    EncoderCallBackFFMPEG::EncoderCallBackFFMPEG(const String& fileName, Size frameSize, double fps) :
768        stream_(0), isKeyFrame_(false)
769    {
770        int buf_size = std::max(frameSize.area() * 4, 1024 * 1024);
771        buf_.resize(buf_size);
772
773        CV_Assert( init_MediaStream_FFMPEG() );
774
775        stream_ = create_OutputMediaStream_FFMPEG_p(fileName.c_str(), frameSize.width, frameSize.height, fps);
776        CV_Assert( stream_ != 0 );
777    }
778
779    EncoderCallBackFFMPEG::~EncoderCallBackFFMPEG()
780    {
781        release_OutputMediaStream_FFMPEG_p(stream_);
782    }
783
784    unsigned char* EncoderCallBackFFMPEG::acquireBitStream(int* bufferSize)
785    {
786        *bufferSize = static_cast<int>(buf_.size());
787        return &buf_[0];
788    }
789
790    void EncoderCallBackFFMPEG::releaseBitStream(unsigned char* data, int size)
791    {
792        write_OutputMediaStream_FFMPEG_p(stream_, data, size, isKeyFrame_);
793    }
794
795    void EncoderCallBackFFMPEG::onBeginFrame(int frameNumber, PicType picType)
796    {
797        (void) frameNumber;
798        isKeyFrame_ = (picType == IFRAME);
799    }
800
801    void EncoderCallBackFFMPEG::onEndFrame(int frameNumber, PicType picType)
802    {
803        (void) frameNumber;
804        (void) picType;
805    }
806}
807
808///////////////////////////////////////////////////////////////////////////
809// EncoderParams
810
811cv::cudacodec::EncoderParams::EncoderParams()
812{
813    P_Interval = 3;
814    IDR_Period = 15;
815    DynamicGOP = 0;
816    RCType = 1;
817    AvgBitrate = 4000000;
818    PeakBitrate = 10000000;
819    QP_Level_Intra = 25;
820    QP_Level_InterP = 28;
821    QP_Level_InterB = 31;
822    DeblockMode = 1;
823    ProfileLevel = 65357;
824    ForceIntra = 0;
825    ForceIDR = 0;
826    ClearStat = 0;
827    DIMode = 1;
828    Presets = 2;
829    DisableCabac = 0;
830    NaluFramingType = 0;
831    DisableSPSPPS = 0;
832}
833
834cv::cudacodec::EncoderParams::EncoderParams(const String& configFile)
835{
836    load(configFile);
837}
838
839void cv::cudacodec::EncoderParams::load(const String& configFile)
840{
841    FileStorage fs(configFile, FileStorage::READ);
842    CV_Assert( fs.isOpened() );
843
844    read(fs["P_Interval"     ], P_Interval, 3);
845    read(fs["IDR_Period"     ], IDR_Period, 15);
846    read(fs["DynamicGOP"     ], DynamicGOP, 0);
847    read(fs["RCType"         ], RCType, 1);
848    read(fs["AvgBitrate"     ], AvgBitrate, 4000000);
849    read(fs["PeakBitrate"    ], PeakBitrate, 10000000);
850    read(fs["QP_Level_Intra" ], QP_Level_Intra, 25);
851    read(fs["QP_Level_InterP"], QP_Level_InterP, 28);
852    read(fs["QP_Level_InterB"], QP_Level_InterB, 31);
853    read(fs["DeblockMode"    ], DeblockMode, 1);
854    read(fs["ProfileLevel"   ], ProfileLevel, 65357);
855    read(fs["ForceIntra"     ], ForceIntra, 0);
856    read(fs["ForceIDR"       ], ForceIDR, 0);
857    read(fs["ClearStat"      ], ClearStat, 0);
858    read(fs["DIMode"         ], DIMode, 1);
859    read(fs["Presets"        ], Presets, 2);
860    read(fs["DisableCabac"   ], DisableCabac, 0);
861    read(fs["NaluFramingType"], NaluFramingType, 0);
862    read(fs["DisableSPSPPS"  ], DisableSPSPPS, 0);
863}
864
865void cv::cudacodec::EncoderParams::save(const String& configFile) const
866{
867    FileStorage fs(configFile, FileStorage::WRITE);
868    CV_Assert( fs.isOpened() );
869
870    write(fs, "P_Interval"     , P_Interval);
871    write(fs, "IDR_Period"     , IDR_Period);
872    write(fs, "DynamicGOP"     , DynamicGOP);
873    write(fs, "RCType"         , RCType);
874    write(fs, "AvgBitrate"     , AvgBitrate);
875    write(fs, "PeakBitrate"    , PeakBitrate);
876    write(fs, "QP_Level_Intra" , QP_Level_Intra);
877    write(fs, "QP_Level_InterP", QP_Level_InterP);
878    write(fs, "QP_Level_InterB", QP_Level_InterB);
879    write(fs, "DeblockMode"    , DeblockMode);
880    write(fs, "ProfileLevel"   , ProfileLevel);
881    write(fs, "ForceIntra"     , ForceIntra);
882    write(fs, "ForceIDR"       , ForceIDR);
883    write(fs, "ClearStat"      , ClearStat);
884    write(fs, "DIMode"         , DIMode);
885    write(fs, "Presets"        , Presets);
886    write(fs, "DisableCabac"   , DisableCabac);
887    write(fs, "NaluFramingType", NaluFramingType);
888    write(fs, "DisableSPSPPS"  , DisableSPSPPS);
889}
890
891///////////////////////////////////////////////////////////////////////////
892// createVideoWriter
893
894Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format)
895{
896    Ptr<EncoderCallBack> encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps));
897    return createVideoWriter(encoderCallback, frameSize, fps, format);
898}
899
900Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format)
901{
902    Ptr<EncoderCallBack> encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps));
903    return createVideoWriter(encoderCallback, frameSize, fps, params, format);
904}
905
906Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, SurfaceFormat format)
907{
908    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, fps, format);
909}
910
911Ptr<VideoWriter> cv::cudacodec::createVideoWriter(const Ptr<EncoderCallBack>& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format)
912{
913    return makePtr<VideoWriterImpl>(encoderCallback, frameSize, fps, params, format);
914}
915
916#endif // !defined HAVE_CUDA || !defined WIN32
917