1/*M///////////////////////////////////////////////////////////////////////////////////////
2//
3//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4//
5//  By downloading, copying, installing or using the software you agree to this license.
6//  If you do not agree to this license, do not download, install,
7//  copy or use the software.
8//
9//
10//                           License Agreement
11//                For Open Source Computer Vision Library
12//
13// Copyright (C) 2015, OpenCV Foundation, all rights reserved.
14// Third party copyrights are property of their respective owners.
15//
16// Redistribution and use in source and binary forms, with or without modification,
17// are permitted provided that the following conditions are met:
18//
19//   * Redistribution's of source code must retain the above copyright notice,
20//     this list of conditions and the following disclaimer.
21//
22//   * Redistribution's in binary form must reproduce the above copyright notice,
23//     this list of conditions and the following disclaimer in the documentation
24//     and/or other materials provided with the distribution.
25//
26//   * The name of Intel Corporation may not be used to endorse or promote products
27//     derived from this software without specific prior written permission.
28//
29// This software is provided by the copyright holders and contributors "as is" and
30// any express or implied warranties, including, but not limited to, the implied
31// warranties of merchantability and fitness for a particular purpose are disclaimed.
32// In no event shall the Intel Corporation or contributors be liable for any direct,
33// indirect, incidental, special, exemplary, or consequential damages
34// (including, but not limited to, procurement of substitute goods or services;
35// loss of use, data, or profits; or business interruption) however caused
36// and on any theory of liability, whether in contract, strict liability,
37// or tort (including negligence or otherwise) arising in any way out of
38// the use of this software, even if advised of the possibility of such damage.
39//
40//M*/
41
42#include "precomp.hpp"
43#include <vector>
44
45#if CV_NEON
46#define WITH_NEON
47#endif
48
49namespace cv
50{
51namespace mjpeg
52{
53
54enum { COLORSPACE_GRAY=0, COLORSPACE_RGBA=1, COLORSPACE_BGR=2, COLORSPACE_YUV444P=3 };
55
56#define fourCC(a,b,c,d)   ((int)((uchar(d)<<24) | (uchar(c)<<16) | (uchar(b)<<8) | uchar(a)))
57
58static const int AVIH_STRH_SIZE = 56;
59static const int STRF_SIZE = 40;
60static const int AVI_DWFLAG = 0x00000910;
61static const int AVI_DWSCALE = 1;
62static const int AVI_DWQUALITY = -1;
63static const int JUNK_SEEK = 4096;
64static const int AVIIF_KEYFRAME = 0x10;
65static const int MAX_BYTES_PER_SEC = 99999999;
66static const int SUG_BUFFER_SIZE = 1048576;
67
68static const unsigned bit_mask[] =
69{
70    0,
71    0x00000001, 0x00000003, 0x00000007, 0x0000000F,
72    0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
73    0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
74    0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
75    0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
76    0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
77    0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
78    0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
79};
80
81class BitStream
82{
83public:
84    enum
85    {
86        DEFAULT_BLOCK_SIZE = (1 << 15),
87        huff_val_shift = 20,
88        huff_code_mask = (1 << huff_val_shift) - 1
89    };
90
91    BitStream()
92    {
93        m_buf.resize(DEFAULT_BLOCK_SIZE + 1024);
94        m_start = &m_buf[0];
95        m_end = m_start + DEFAULT_BLOCK_SIZE;
96        m_is_opened = false;
97        m_f = 0;
98    }
99
100    ~BitStream()
101    {
102        close();
103    }
104
105    bool open(const String& filename)
106    {
107        close();
108        m_f = fopen(filename.c_str(), "wb");
109        if( !m_f )
110            return false;
111        m_current = m_start;
112        m_pos = 0;
113        return true;
114    }
115
116    bool isOpened() const { return m_f != 0; }
117
118    void close()
119    {
120        writeBlock();
121        if( m_f )
122            fclose(m_f);
123        m_f = 0;
124    }
125
126    void writeBlock()
127    {
128        size_t wsz0 = m_current - m_start;
129        if( wsz0 > 0 && m_f )
130        {
131            size_t wsz = fwrite(m_start, 1, wsz0, m_f);
132            CV_Assert( wsz == wsz0 );
133        }
134        m_pos += wsz0;
135        m_current = m_start;
136    }
137
138    size_t getPos() const
139    {
140        return (size_t)(m_current - m_start) + m_pos;
141    }
142
143    void putByte(int val)
144    {
145        *m_current++ = (uchar)val;
146        if( m_current >= m_end )
147            writeBlock();
148    }
149
150    void putBytes(const uchar* buf, int count)
151    {
152        uchar* data = (uchar*)buf;
153        CV_Assert(m_f && data && m_current && count >= 0);
154        if( m_current >= m_end )
155            writeBlock();
156
157        while( count )
158        {
159            int l = (int)(m_end - m_current);
160
161            if (l > count)
162                l = count;
163
164            if( l > 0 )
165            {
166                memcpy(m_current, data, l);
167                m_current += l;
168                data += l;
169                count -= l;
170            }
171            if( m_current >= m_end )
172                writeBlock();
173        }
174    }
175
176    void putShort(int val)
177    {
178        m_current[0] = (uchar)val;
179        m_current[1] = (uchar)(val >> 8);
180        m_current += 2;
181        if( m_current >= m_end )
182            writeBlock();
183    }
184
185    void putInt(int val)
186    {
187        m_current[0] = (uchar)val;
188        m_current[1] = (uchar)(val >> 8);
189        m_current[2] = (uchar)(val >> 16);
190        m_current[3] = (uchar)(val >> 24);
191        m_current += 4;
192        if( m_current >= m_end )
193            writeBlock();
194    }
195
196    void jputShort(int val)
197    {
198        m_current[0] = (uchar)(val >> 8);
199        m_current[1] = (uchar)val;
200        m_current += 2;
201        if( m_current >= m_end )
202            writeBlock();
203    }
204
205    void patchInt(int val, size_t pos)
206    {
207        if( pos >= m_pos )
208        {
209            ptrdiff_t delta = pos - m_pos;
210            CV_Assert( delta < m_current - m_start );
211            m_start[delta] = (uchar)val;
212            m_start[delta+1] = (uchar)(val >> 8);
213            m_start[delta+2] = (uchar)(val >> 16);
214            m_start[delta+3] = (uchar)(val >> 24);
215        }
216        else
217        {
218            long fpos = ftell(m_f);
219            fseek(m_f, (long)pos, SEEK_SET);
220            uchar buf[] = { (uchar)val, (uchar)(val >> 8), (uchar)(val >> 16), (uchar)(val >> 24) };
221            fwrite(buf, 1, 4, m_f);
222            fseek(m_f, fpos, SEEK_SET);
223        }
224    }
225
226    void jput(unsigned currval)
227    {
228        uchar v;
229        uchar* ptr = m_current;
230        v = (uchar)(currval >> 24);
231        *ptr++ = v;
232        if( v == 255 )
233            *ptr++ = 0;
234        v = (uchar)(currval >> 16);
235        *ptr++ = v;
236        if( v == 255 )
237            *ptr++ = 0;
238        v = (uchar)(currval >> 8);
239        *ptr++ = v;
240        if( v == 255 )
241            *ptr++ = 0;
242        v = (uchar)currval;
243        *ptr++ = v;
244        if( v == 255 )
245            *ptr++ = 0;
246        m_current = ptr;
247        if( m_current >= m_end )
248            writeBlock();
249    }
250
251    void jflush(unsigned currval, int bitIdx)
252    {
253        uchar v;
254        uchar* ptr = m_current;
255        currval |= (1 << bitIdx)-1;
256        while( bitIdx < 32 )
257        {
258            v = (uchar)(currval >> 24);
259            *ptr++ = v;
260            if( v == 255 )
261                *ptr++ = 0;
262            currval <<= 8;
263            bitIdx += 8;
264        }
265        m_current = ptr;
266        if( m_current >= m_end )
267            writeBlock();
268    }
269
270    static bool createEncodeHuffmanTable( const int* src, unsigned* table, int max_size )
271    {
272        int  i, k;
273        int  min_val = INT_MAX, max_val = INT_MIN;
274        int  size;
275
276        /* calc min and max values in the table */
277        for( i = 1, k = 1; src[k] >= 0; i++ )
278        {
279            int code_count = src[k++];
280
281            for( code_count += k; k < code_count; k++ )
282            {
283                int  val = src[k] >> huff_val_shift;
284                if( val < min_val )
285                    min_val = val;
286                if( val > max_val )
287                    max_val = val;
288            }
289        }
290
291        size = max_val - min_val + 3;
292
293        if( size > max_size )
294        {
295            CV_Error(CV_StsOutOfRange, "too big maximum Huffman code size");
296            return false;
297        }
298
299        memset( table, 0, size*sizeof(table[0]));
300
301        table[0] = min_val;
302        table[1] = size - 2;
303
304        for( i = 1, k = 1; src[k] >= 0; i++ )
305        {
306            int code_count = src[k++];
307
308            for( code_count += k; k < code_count; k++ )
309            {
310                int  val = src[k] >> huff_val_shift;
311                int  code = src[k] & huff_code_mask;
312
313                table[val - min_val + 2] = (code << 8) | i;
314            }
315        }
316        return true;
317    }
318
319    static int* createSourceHuffmanTable(const uchar* src, int* dst,
320                                         int max_bits, int first_bits)
321    {
322        int   i, val_idx, code = 0;
323        int*  table = dst;
324        *dst++ = first_bits;
325        for (i = 1, val_idx = max_bits; i <= max_bits; i++)
326        {
327            int code_count = src[i - 1];
328            dst[0] = code_count;
329            code <<= 1;
330            for (int k = 0; k < code_count; k++)
331            {
332                dst[k + 1] = (src[val_idx + k] << huff_val_shift) | (code + k);
333            }
334            code += code_count;
335            dst += code_count + 1;
336            val_idx += code_count;
337        }
338        dst[0] = -1;
339        return  table;
340    }
341
342protected:
343    std::vector<uchar> m_buf;
344    uchar*  m_start;
345    uchar*  m_end;
346    uchar*  m_current;
347    size_t  m_pos;
348    bool    m_is_opened;
349    FILE*   m_f;
350};
351
352
353class MotionJpegWriter : public IVideoWriter
354{
355public:
356    MotionJpegWriter() { rawstream = false; }
357    MotionJpegWriter(const String& filename, double fps, Size size, bool iscolor)
358    {
359        rawstream = false;
360        open(filename, fps, size, iscolor);
361    }
362    ~MotionJpegWriter() { close(); }
363
364    void close()
365    {
366        if( !strm.isOpened() )
367            return;
368
369        if( !frameOffset.empty() && !rawstream )
370        {
371            endWriteChunk(); // end LIST 'movi'
372            writeIndex();
373            finishWriteAVI();
374        }
375        strm.close();
376        frameOffset.clear();
377        frameSize.clear();
378        AVIChunkSizeIndex.clear();
379        frameNumIndexes.clear();
380    }
381
382    bool open(const String& filename, double fps, Size size, bool iscolor)
383    {
384        close();
385
386        if( filename.empty() )
387            return false;
388        const char* ext = strrchr(filename.c_str(), '.');
389        if( !ext )
390            return false;
391        if( strcmp(ext, ".avi") != 0 && strcmp(ext, ".AVI") != 0 && strcmp(ext, ".Avi") != 0 )
392            return false;
393
394        bool ok = strm.open(filename);
395        if( !ok )
396            return false;
397
398        CV_Assert(fps >= 1);
399        outfps = cvRound(fps);
400        width = size.width;
401        height = size.height;
402        quality = 75;
403        rawstream = false;
404        channels = iscolor ? 3 : 1;
405
406        if( !rawstream )
407        {
408            startWriteAVI();
409            writeStreamHeader();
410        }
411        //printf("motion jpeg stream %s has been successfully opened\n", filename.c_str());
412        return true;
413    }
414
415    bool isOpened() const { return strm.isOpened(); }
416
417    void startWriteAVI()
418    {
419        startWriteChunk(fourCC('R', 'I', 'F', 'F'));
420
421        strm.putInt(fourCC('A', 'V', 'I', ' '));
422
423        startWriteChunk(fourCC('L', 'I', 'S', 'T'));
424
425        strm.putInt(fourCC('h', 'd', 'r', 'l'));
426        strm.putInt(fourCC('a', 'v', 'i', 'h'));
427        strm.putInt(AVIH_STRH_SIZE);
428        strm.putInt(cvRound(1e6 / outfps));
429        strm.putInt(MAX_BYTES_PER_SEC);
430        strm.putInt(0);
431        strm.putInt(AVI_DWFLAG);
432
433        frameNumIndexes.push_back(strm.getPos());
434
435        strm.putInt(0);
436        strm.putInt(0);
437        strm.putInt(1); // number of streams
438        strm.putInt(SUG_BUFFER_SIZE);
439        strm.putInt(width);
440        strm.putInt(height);
441        strm.putInt(0);
442        strm.putInt(0);
443        strm.putInt(0);
444        strm.putInt(0);
445    }
446
447    void writeStreamHeader()
448    {
449        // strh
450        startWriteChunk(fourCC('L', 'I', 'S', 'T'));
451
452        strm.putInt(fourCC('s', 't', 'r', 'l'));
453        strm.putInt(fourCC('s', 't', 'r', 'h'));
454        strm.putInt(AVIH_STRH_SIZE);
455        strm.putInt(fourCC('v', 'i', 'd', 's'));
456        strm.putInt(fourCC('M', 'J', 'P', 'G'));
457        strm.putInt(0);
458        strm.putInt(0);
459        strm.putInt(0);
460        strm.putInt(AVI_DWSCALE);
461        strm.putInt(outfps);
462        strm.putInt(0);
463
464        frameNumIndexes.push_back(strm.getPos());
465
466        strm.putInt(0);
467        strm.putInt(SUG_BUFFER_SIZE);
468        strm.putInt(AVI_DWQUALITY);
469        strm.putInt(0);
470        strm.putShort(0);
471        strm.putShort(0);
472        strm.putShort(width);
473        strm.putShort(height);
474
475        // strf (use the BITMAPINFOHEADER for video)
476        startWriteChunk(fourCC('s', 't', 'r', 'f'));
477
478        strm.putInt(STRF_SIZE);
479        strm.putInt(width);
480        strm.putInt(height);
481        strm.putShort(1); // planes (1 means interleaved data (after decompression))
482
483        strm.putShort(channels); // bits per pixel
484        strm.putInt(fourCC('M', 'J', 'P', 'G'));
485        strm.putInt(width * height * channels);
486        strm.putInt(0);
487        strm.putInt(0);
488        strm.putInt(0);
489        strm.putInt(0);
490        // Must be indx chunk
491        endWriteChunk(); // end strf
492        endWriteChunk(); // end strl
493
494        // odml
495        startWriteChunk(fourCC('L', 'I', 'S', 'T'));
496        strm.putInt(fourCC('o', 'd', 'm', 'l'));
497        startWriteChunk(fourCC('d', 'm', 'l', 'h'));
498
499        frameNumIndexes.push_back(strm.getPos());
500
501        strm.putInt(0);
502        strm.putInt(0);
503
504        endWriteChunk(); // end dmlh
505        endWriteChunk(); // end odml
506
507        endWriteChunk(); // end hdrl
508
509        // JUNK
510        startWriteChunk(fourCC('J', 'U', 'N', 'K'));
511        size_t pos = strm.getPos();
512        for( ; pos < (size_t)JUNK_SEEK; pos += 4 )
513            strm.putInt(0);
514        endWriteChunk(); // end JUNK
515        // movi
516        startWriteChunk(fourCC('L', 'I', 'S', 'T'));
517        moviPointer = strm.getPos();
518        strm.putInt(fourCC('m', 'o', 'v', 'i'));
519    }
520
521    void startWriteChunk(int fourcc)
522    {
523        CV_Assert(fourcc != 0);
524        strm.putInt(fourcc);
525
526        AVIChunkSizeIndex.push_back(strm.getPos());
527        strm.putInt(0);
528    }
529
530    void endWriteChunk()
531    {
532        if( !AVIChunkSizeIndex.empty() )
533        {
534            size_t currpos = strm.getPos();
535            size_t pospos = AVIChunkSizeIndex.back();
536            AVIChunkSizeIndex.pop_back();
537            int chunksz = (int)(currpos - (pospos + 4));
538            strm.patchInt(chunksz, pospos);
539        }
540    }
541
542    void writeIndex()
543    {
544        // old style AVI index. Must be Open-DML index
545        startWriteChunk(fourCC('i', 'd', 'x', '1'));
546        int nframes = (int)frameOffset.size();
547        for( int i = 0; i < nframes; i++ )
548        {
549            strm.putInt(fourCC('0', '0', 'd', 'c'));
550            strm.putInt(AVIIF_KEYFRAME);
551            strm.putInt((int)frameOffset[i]);
552            strm.putInt((int)frameSize[i]);
553        }
554        endWriteChunk(); // End idx1
555    }
556
557    void finishWriteAVI()
558    {
559        int nframes = (int)frameOffset.size();
560        // Record frames numbers to AVI Header
561        while (!frameNumIndexes.empty())
562        {
563            size_t ppos = frameNumIndexes.back();
564            frameNumIndexes.pop_back();
565            strm.patchInt(nframes, ppos);
566        }
567        endWriteChunk(); // end RIFF
568    }
569
570    void write(InputArray _img)
571    {
572        Mat img = _img.getMat();
573        size_t chunkPointer = strm.getPos();
574        int input_channels = img.channels();
575        int colorspace = -1;
576
577        if( input_channels == 1 && channels == 1 )
578        {
579            CV_Assert( img.cols == width && img.rows == height );
580            colorspace = COLORSPACE_GRAY;
581        }
582        else if( input_channels == 4 )
583        {
584            CV_Assert( img.cols == width && img.rows == height && channels == 3 );
585            colorspace = COLORSPACE_RGBA;
586        }
587        else if( input_channels == 3 )
588        {
589            CV_Assert( img.cols == width && img.rows == height && channels == 3 );
590            colorspace = COLORSPACE_BGR;
591        }
592        else if( input_channels == 1 && channels == 3 )
593        {
594            CV_Assert( img.cols == width && img.rows == height*3 );
595            colorspace = COLORSPACE_YUV444P;
596        }
597        else
598            CV_Error(CV_StsBadArg, "Invalid combination of specified video colorspace and the input image colorspace");
599
600        if( !rawstream )
601            startWriteChunk(fourCC('0', '0', 'd', 'c'));
602
603        writeFrameData(img.data, (int)img.step, colorspace, input_channels);
604
605        if( !rawstream )
606        {
607            frameOffset.push_back(chunkPointer - moviPointer);
608            frameSize.push_back(strm.getPos() - chunkPointer - 8);       // Size excludes '00dc' and size field
609            endWriteChunk(); // end '00dc'
610        }
611    }
612
613    double getProperty(int propId) const
614    {
615        if( propId == VIDEOWRITER_PROP_QUALITY )
616            return quality;
617        if( propId == VIDEOWRITER_PROP_FRAMEBYTES )
618            return frameSize.empty() ? 0. : (double)frameSize.back();
619        return 0.;
620    }
621
622    bool setProperty(int propId, double value)
623    {
624        if( propId == VIDEOWRITER_PROP_QUALITY )
625        {
626            quality = value;
627            return true;
628        }
629        return false;
630    }
631
632    void writeFrameData( const uchar* data, int step, int colorspace, int input_channels );
633
634protected:
635    int outfps;
636    int width, height, channels;
637    double quality;
638    size_t moviPointer;
639    std::vector<size_t> frameOffset, frameSize, AVIChunkSizeIndex, frameNumIndexes;
640    bool rawstream;
641
642    BitStream strm;
643};
644
645#define DCT_DESCALE(x, n) (((x) + (((int)1) << ((n) - 1))) >> (n))
646#define fix(x, n)   (int)((x)*(1 << (n)) + .5);
647
648enum
649{
650    fixb = 14,
651    fixc = 12,
652    postshift = 14
653};
654
655static const int C0_707 = fix(0.707106781f, fixb);
656static const int C0_541 = fix(0.541196100f, fixb);
657static const int C0_382 = fix(0.382683432f, fixb);
658static const int C1_306 = fix(1.306562965f, fixb);
659
660static const int y_r = fix(0.299, fixc);
661static const int y_g = fix(0.587, fixc);
662static const int y_b = fix(0.114, fixc);
663
664static const int cb_r = -fix(0.1687, fixc);
665static const int cb_g = -fix(0.3313, fixc);
666static const int cb_b = fix(0.5, fixc);
667
668static const int cr_r = fix(0.5, fixc);
669static const int cr_g = -fix(0.4187, fixc);
670static const int cr_b = -fix(0.0813, fixc);
671
672// Standard JPEG quantization tables
673static const uchar jpegTableK1_T[] =
674{
675    16, 12, 14, 14,  18,  24,  49,  72,
676    11, 12, 13, 17,  22,  35,  64,  92,
677    10, 14, 16, 22,  37,  55,  78,  95,
678    16, 19, 24, 29,  56,  64,  87,  98,
679    24, 26, 40, 51,  68,  81, 103, 112,
680    40, 58, 57, 87, 109, 104, 121, 100,
681    51, 60, 69, 80, 103, 113, 120, 103,
682    61, 55, 56, 62,  77,  92, 101,  99
683};
684
685static const uchar jpegTableK2_T[] =
686{
687    17, 18, 24, 47, 99, 99, 99, 99,
688    18, 21, 26, 66, 99, 99, 99, 99,
689    24, 26, 56, 99, 99, 99, 99, 99,
690    47, 66, 99, 99, 99, 99, 99, 99,
691    99, 99, 99, 99, 99, 99, 99, 99,
692    99, 99, 99, 99, 99, 99, 99, 99,
693    99, 99, 99, 99, 99, 99, 99, 99,
694    99, 99, 99, 99, 99, 99, 99, 99
695};
696
697// Standard Huffman tables
698
699// ... for luma DCs.
700static const uchar jpegTableK3[] =
701{
702    0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
703    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
704};
705
706// ... for chroma DCs.
707static const uchar jpegTableK4[] =
708{
709    0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
710    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
711};
712
713// ... for luma ACs.
714static const uchar jpegTableK5[] =
715{
716    0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125,
717    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
718    0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
719    0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
720    0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
721    0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
722    0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
723    0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
724    0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
725    0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
726    0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
727    0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
728    0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
729    0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
730    0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
731    0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
732    0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
733    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
734    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
735    0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
736    0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
737    0xf9, 0xfa
738};
739
740// ... for chroma ACs
741static const uchar jpegTableK6[] =
742{
743    0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119,
744    0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
745    0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
746    0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
747    0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
748    0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
749    0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
750    0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
751    0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
752    0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
753    0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
754    0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
755    0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
756    0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
757    0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
758    0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
759    0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
760    0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
761    0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
762    0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
763    0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
764    0xf9, 0xfa
765};
766
767static const uchar zigzag[] =
768{
769    0,  8,  1,  2,  9, 16, 24, 17, 10,  3,  4, 11, 18, 25, 32, 40,
770    33, 26, 19, 12,  5,  6, 13, 20, 27, 34, 41, 48, 56, 49, 42, 35,
771    28, 21, 14,  7, 15, 22, 29, 36, 43, 50, 57, 58, 51, 44, 37, 30,
772    23, 31, 38, 45, 52, 59, 60, 53, 46, 39, 47, 54, 61, 62, 55, 63,
773    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
774};
775
776
777static const int idct_prescale[] =
778{
779    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
780    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
781    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
782    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
783    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
784    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
785    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
786    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
787};
788
789static const char jpegHeader[] =
790"\xFF\xD8"  // SOI  - start of image
791"\xFF\xE0"  // APP0 - jfif extention
792"\x00\x10"  // 2 bytes: length of APP0 segment
793"JFIF\x00"  // JFIF signature
794"\x01\x02"  // version of JFIF
795"\x00"      // units = pixels ( 1 - inch, 2 - cm )
796"\x00\x01\x00\x01" // 2 2-bytes values: x density & y density
797"\x00\x00"; // width & height of thumbnail: ( 0x0 means no thumbnail)
798
799#ifdef WITH_NEON
800// FDCT with postscaling
801static void aan_fdct8x8( const short *src, short *dst,
802                        int step, const short *postscale )
803{
804    // Pass 1: process rows
805    int16x8_t x0 = vld1q_s16(src);    int16x8_t x1 = vld1q_s16(src + step*7);
806    int16x8_t x2 = vld1q_s16(src + step*3);    int16x8_t x3 = vld1q_s16(src + step*4);
807
808    int16x8_t x4 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
809    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
810
811    int16x8_t t1 = x0; int16x8_t t2 = x2;
812
813    x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
814
815    x0 = vld1q_s16(src + step);    x3 = vld1q_s16(src + step*6);
816
817    x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
818    int16x8_t t3 = x0;
819
820    x0 = vld1q_s16(src + step*2);    x3 = vld1q_s16(src + step*5);
821
822    int16x8_t t4 = vsubq_s16(x0, x3);
823
824    x0 = vaddq_s16(x0, x3);
825    x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
826    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
827
828    int16x8_t res0 = x1;
829    int16x8_t res4 = x2;
830    x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
831    x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
832
833    int16x8_t res2 = x4;
834    int16x8_t res6 = x1;
835
836    x0 = t2;    x1 = t4;
837    x2 = t3;    x3 = t1;
838    x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
839    x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
840
841    x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
842    x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
843    x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
844    x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
845
846    x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
847    x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
848
849    int16x8_t res1 = x0;
850    int16x8_t res3 = x3;
851    int16x8_t res5 = x1;
852    int16x8_t res7 = x4;
853
854    //transpose a matrix
855    /*
856     res0 00 01 02 03 04 05 06 07
857     res1 10 11 12 13 14 15 16 17
858     res2 20 21 22 23 24 25 26 27
859     res3 30 31 32 33 34 35 36 37
860     res4 40 41 42 43 44 45 46 47
861     res5 50 51 52 53 54 55 56 57
862     res6 60 61 62 63 64 65 66 67
863     res7 70 71 72 73 74 75 76 77
864     */
865
866    //transpose elements 00-33
867    int16x4_t res0_0 = vget_low_s16(res0);
868    int16x4_t res1_0 = vget_low_s16(res1);
869    int16x4x2_t tres = vtrn_s16(res0_0, res1_0);
870    int32x4_t l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
871
872    res0_0 = vget_low_s16(res2);
873    res1_0 = vget_low_s16(res3);
874    tres = vtrn_s16(res0_0, res1_0);
875    int32x4_t l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
876
877    int32x4x2_t tres1 = vtrnq_s32(l0, l1);
878
879    // transpose elements 40-73
880    res0_0 = vget_low_s16(res4);
881    res1_0 = vget_low_s16(res5);
882    tres = vtrn_s16(res0_0, res1_0);
883    l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
884
885    res0_0 = vget_low_s16(res6);
886    res1_0 = vget_low_s16(res7);
887
888    tres = vtrn_s16(res0_0, res1_0);
889    l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
890
891    int32x4x2_t tres2 = vtrnq_s32(l0, l1);
892
893    //combine into 0-3
894    int16x8_t transp_res0 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
895    int16x8_t transp_res1 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
896    int16x8_t transp_res2 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
897    int16x8_t transp_res3 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
898
899    // transpose elements 04-37
900    res0_0 = vget_high_s16(res0);
901    res1_0 = vget_high_s16(res1);
902    tres = vtrn_s16(res0_0, res1_0);
903    l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
904
905    res0_0 = vget_high_s16(res2);
906    res1_0 = vget_high_s16(res3);
907
908    tres = vtrn_s16(res0_0, res1_0);
909    l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
910
911    tres1 = vtrnq_s32(l0, l1);
912
913    // transpose elements 44-77
914    res0_0 = vget_high_s16(res4);
915    res1_0 = vget_high_s16(res5);
916    tres = vtrn_s16(res0_0, res1_0);
917    l0 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
918
919    res0_0 = vget_high_s16(res6);
920    res1_0 = vget_high_s16(res7);
921
922    tres = vtrn_s16(res0_0, res1_0);
923    l1 = vcombine_s32(vreinterpret_s32_s16(tres.val[0]),vreinterpret_s32_s16(tres.val[1]));
924
925    tres2 = vtrnq_s32(l0, l1);
926
927    //combine into 4-7
928    int16x8_t transp_res4 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[0]), vget_low_s32(tres2.val[0])));
929    int16x8_t transp_res5 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[0]), vget_high_s32(tres2.val[0])));
930    int16x8_t transp_res6 =  vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(tres1.val[1]), vget_low_s32(tres2.val[1])));
931    int16x8_t transp_res7 =  vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(tres1.val[1]), vget_high_s32(tres2.val[1])));
932
933    //special hack for vqdmulhq_s16 command that is producing -1 instead of 0
934#define STORE_DESCALED(addr, reg, mul_addr)            postscale_line = vld1q_s16((mul_addr)); \
935mask = vreinterpretq_s16_u16(vcltq_s16((reg), z)); \
936reg = vabsq_s16(reg); \
937reg = vqdmulhq_s16(vqaddq_s16((reg), (reg)), postscale_line); \
938reg = vsubq_s16(veorq_s16(reg, mask), mask); \
939vst1q_s16((addr), reg);
940
941    int16x8_t z = vdupq_n_s16(0), postscale_line, mask;
942
943    // pass 2: process columns
944    x0 = transp_res0;    x1 = transp_res7;
945    x2 = transp_res3;    x3 = transp_res4;
946
947    x4 = vaddq_s16(x0, x1);   x0 = vsubq_s16(x0, x1);
948    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
949
950    t1 = x0; t2 = x2;
951
952    x2 = vaddq_s16(x4, x1);    x4 = vsubq_s16(x4, x1);
953
954    x0 = transp_res1;
955    x3 = transp_res6;
956
957    x1 = vaddq_s16(x0, x3);    x0 = vsubq_s16(x0, x3);
958
959    t3 = x0;
960
961    x0 = transp_res2; x3 = transp_res5;
962
963    t4 = vsubq_s16(x0, x3);
964
965    x0 = vaddq_s16(x0, x3);
966
967    x3 = vaddq_s16(x0, x1);    x0 = vsubq_s16(x0, x1);
968    x1 = vaddq_s16(x2, x3);    x2 = vsubq_s16(x2, x3);
969
970    STORE_DESCALED(dst, x1, postscale);
971    STORE_DESCALED(dst + 4*8, x2, postscale + 4*8);
972
973    x0 = vqdmulhq_n_s16(vsubq_s16(x0, x4), (short)(C0_707*2));
974
975    x1 = vaddq_s16(x4, x0);    x4 = vsubq_s16(x4, x0);
976
977    STORE_DESCALED(dst + 2*8, x4,postscale + 2*8);
978    STORE_DESCALED(dst + 6*8, x1,postscale + 6*8);
979
980    x0 = t2; x1 = t4;
981    x2 = t3; x3 = t1;
982
983    x0 = vaddq_s16(x0, x1);    x1 = vaddq_s16(x1, x2);    x2 = vaddq_s16(x2, x3);
984
985    x1 =vqdmulhq_n_s16(x1, (short)(C0_707*2));
986
987    x4 = vaddq_s16(x1, x3);    x3 = vsubq_s16(x3, x1);
988
989    x1 = vqdmulhq_n_s16(vsubq_s16(x0, x2), (short)(C0_382*2));
990    x0 = vaddq_s16(vqdmulhq_n_s16(x0, (short)(C0_541*2)), x1);
991    x2 = vaddq_s16(vshlq_n_s16(vqdmulhq_n_s16(x2, (short)C1_306), 1), x1);
992
993    x1 = vaddq_s16(x0, x3);    x3 = vsubq_s16(x3, x0);
994    x0 = vaddq_s16(x4, x2);    x4 = vsubq_s16(x4, x2);
995
996    STORE_DESCALED(dst + 5*8, x1,postscale + 5*8);
997    STORE_DESCALED(dst + 1*8, x0,postscale + 1*8);
998    STORE_DESCALED(dst + 7*8, x4,postscale + 7*8);
999    STORE_DESCALED(dst + 3*8, x3,postscale + 3*8);
1000}
1001
1002#else
1003// FDCT with postscaling
1004static void aan_fdct8x8( const short *src, short *dst,
1005                        int step, const short *postscale )
1006{
1007    int workspace[64], *work = workspace;
1008    int  i;
1009
1010    // Pass 1: process rows
1011    for( i = 8; i > 0; i--, src += step, work += 8 )
1012    {
1013        int x0 = src[0], x1 = src[7];
1014        int x2 = src[3], x3 = src[4];
1015
1016        int x4 = x0 + x1; x0 -= x1;
1017        x1 = x2 + x3; x2 -= x3;
1018
1019        work[7] = x0; work[1] = x2;
1020        x2 = x4 + x1; x4 -= x1;
1021
1022        x0 = src[1]; x3 = src[6];
1023        x1 = x0 + x3; x0 -= x3;
1024        work[5] = x0;
1025
1026        x0 = src[2]; x3 = src[5];
1027        work[3] = x0 - x3; x0 += x3;
1028
1029        x3 = x0 + x1; x0 -= x1;
1030        x1 = x2 + x3; x2 -= x3;
1031
1032        work[0] = x1; work[4] = x2;
1033
1034        x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
1035        x1 = x4 + x0; x4 -= x0;
1036        work[2] = x4; work[6] = x1;
1037
1038        x0 = work[1]; x1 = work[3];
1039        x2 = work[5]; x3 = work[7];
1040
1041        x0 += x1; x1 += x2; x2 += x3;
1042        x1 = DCT_DESCALE(x1*C0_707, fixb);
1043
1044        x4 = x1 + x3; x3 -= x1;
1045        x1 = (x0 - x2)*C0_382;
1046        x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
1047        x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
1048
1049        x1 = x0 + x3; x3 -= x0;
1050        x0 = x4 + x2; x4 -= x2;
1051
1052        work[5] = x1; work[1] = x0;
1053        work[7] = x4; work[3] = x3;
1054    }
1055
1056    work = workspace;
1057    // pass 2: process columns
1058    for( i = 8; i > 0; i--, work++, postscale += 8, dst += 8 )
1059    {
1060        int  x0 = work[8*0], x1 = work[8*7];
1061        int  x2 = work[8*3], x3 = work[8*4];
1062
1063        int  x4 = x0 + x1; x0 -= x1;
1064        x1 = x2 + x3; x2 -= x3;
1065
1066        work[8*7] = x0; work[8*0] = x2;
1067        x2 = x4 + x1; x4 -= x1;
1068
1069        x0 = work[8*1]; x3 = work[8*6];
1070        x1 = x0 + x3; x0 -= x3;
1071        work[8*4] = x0;
1072
1073        x0 = work[8*2]; x3 = work[8*5];
1074        work[8*3] = x0 - x3; x0 += x3;
1075
1076        x3 = x0 + x1; x0 -= x1;
1077        x1 = x2 + x3; x2 -= x3;
1078
1079        dst[0] = (short)DCT_DESCALE(x1*postscale[0], postshift);
1080        dst[4] = (short)DCT_DESCALE(x2*postscale[4], postshift);
1081
1082        x0 = DCT_DESCALE((x0 - x4)*C0_707, fixb);
1083        x1 = x4 + x0; x4 -= x0;
1084
1085        dst[2] = (short)DCT_DESCALE(x4*postscale[2], postshift);
1086        dst[6] = (short)DCT_DESCALE(x1*postscale[6], postshift);
1087
1088        x0 = work[8*0]; x1 = work[8*3];
1089        x2 = work[8*4]; x3 = work[8*7];
1090
1091        x0 += x1; x1 += x2; x2 += x3;
1092        x1 = DCT_DESCALE(x1*C0_707, fixb);
1093
1094        x4 = x1 + x3; x3 -= x1;
1095        x1 = (x0 - x2)*C0_382;
1096        x0 = DCT_DESCALE(x0*C0_541 + x1, fixb);
1097        x2 = DCT_DESCALE(x2*C1_306 + x1, fixb);
1098
1099        x1 = x0 + x3; x3 -= x0;
1100        x0 = x4 + x2; x4 -= x2;
1101
1102        dst[5] = (short)DCT_DESCALE(x1*postscale[5], postshift);
1103        dst[1] = (short)DCT_DESCALE(x0*postscale[1], postshift);
1104        dst[7] = (short)DCT_DESCALE(x4*postscale[7], postshift);
1105        dst[3] = (short)DCT_DESCALE(x3*postscale[3], postshift);
1106    }
1107}
1108#endif
1109
1110void MotionJpegWriter::writeFrameData( const uchar* data, int step, int colorspace, int input_channels )
1111{
1112    //double total_cvt = 0, total_dct = 0;
1113    static bool init_cat_table = false;
1114    const int CAT_TAB_SIZE = 4096;
1115    static uchar cat_table[CAT_TAB_SIZE*2+1];
1116    if( !init_cat_table )
1117    {
1118        for( int i = -CAT_TAB_SIZE; i <= CAT_TAB_SIZE; i++ )
1119        {
1120            Cv32suf a;
1121            a.f = (float)i;
1122            cat_table[i+CAT_TAB_SIZE] = ((a.i >> 23) & 255) - (126 & (i ? -1 : 0));
1123        }
1124        init_cat_table = true;
1125    }
1126
1127    //double total_dct = 0, total_cvt = 0;
1128    CV_Assert( data && width > 0 && height > 0 );
1129
1130    // encode the header and tables
1131    // for each mcu:
1132    //   convert rgb to yuv with downsampling (if color).
1133    //   for every block:
1134    //     calc dct and quantize
1135    //     encode block.
1136    int x, y;
1137    int i, j;
1138    const int max_quality = 12;
1139    short fdct_qtab[2][64];
1140    unsigned huff_dc_tab[2][16];
1141    unsigned huff_ac_tab[2][256];
1142
1143    int  x_scale = channels > 1 ? 2 : 1, y_scale = x_scale;
1144    int  dc_pred[] = { 0, 0, 0 };
1145    int  x_step = x_scale * 8;
1146    int  y_step = y_scale * 8;
1147    short  block[6][64];
1148    short  buffer[4096];
1149    int*   hbuffer = (int*)buffer;
1150    int  luma_count = x_scale*y_scale;
1151    int  block_count = luma_count + channels - 1;
1152    int  Y_step = x_scale*8;
1153    const int UV_step = 16;
1154    int u_plane_ofs = step*height;
1155    int v_plane_ofs = u_plane_ofs + step*height;
1156    double _quality = quality*0.01*max_quality;
1157
1158    if( _quality < 1. ) _quality = 1.;
1159    if( _quality > max_quality ) _quality = max_quality;
1160
1161    double inv_quality = 1./_quality;
1162
1163    // Encode header
1164    strm.putBytes( (const uchar*)jpegHeader, sizeof(jpegHeader) - 1 );
1165
1166    // Encode quantization tables
1167    for( i = 0; i < (channels > 1 ? 2 : 1); i++ )
1168    {
1169        const uchar* qtable = i == 0 ? jpegTableK1_T : jpegTableK2_T;
1170        int chroma_scale = i > 0 ? luma_count : 1;
1171
1172        strm.jputShort( 0xffdb );   // DQT marker
1173        strm.jputShort( 2 + 65*1 ); // put single qtable
1174        strm.putByte( 0*16 + i );   // 8-bit table
1175
1176        // put coefficients
1177        for( j = 0; j < 64; j++ )
1178        {
1179            int idx = zigzag[j];
1180            int qval = cvRound(qtable[idx]*inv_quality);
1181            if( qval < 1 )
1182                qval = 1;
1183            if( qval > 255 )
1184                qval = 255;
1185            fdct_qtab[i][idx] = (short)(cvRound((1 << (postshift + 11)))/
1186                                (qval*chroma_scale*idct_prescale[idx]));
1187            strm.putByte( qval );
1188        }
1189    }
1190
1191    // Encode huffman tables
1192    for( i = 0; i < (channels > 1 ? 4 : 2); i++ )
1193    {
1194        const uchar* htable = i == 0 ? jpegTableK3 : i == 1 ? jpegTableK5 :
1195        i == 2 ? jpegTableK4 : jpegTableK6;
1196        int is_ac_tab = i & 1;
1197        int idx = i >= 2;
1198        int tableSize = 16 + (is_ac_tab ? 162 : 12);
1199
1200        strm.jputShort( 0xFFC4 );      // DHT marker
1201        strm.jputShort( 3 + tableSize ); // define one huffman table
1202        strm.putByte( is_ac_tab*16 + idx ); // put DC/AC flag and table index
1203        strm.putBytes( htable, tableSize ); // put table
1204
1205        BitStream::createEncodeHuffmanTable( BitStream::createSourceHuffmanTable(
1206                                            htable, hbuffer, 16, 9 ), is_ac_tab ? huff_ac_tab[idx] :
1207                                            huff_dc_tab[idx], is_ac_tab ? 256 : 16 );
1208    }
1209
1210    // put frame header
1211    strm.jputShort( 0xFFC0 );          // SOF0 marker
1212    strm.jputShort( 8 + 3*channels );  // length of frame header
1213    strm.putByte( 8 );               // sample precision
1214    strm.jputShort( height );
1215    strm.jputShort( width );
1216    strm.putByte( channels );        // number of components
1217
1218    for( i = 0; i < channels; i++ )
1219    {
1220        strm.putByte( i + 1 );  // (i+1)-th component id (Y,U or V)
1221        if( i == 0 )
1222            strm.putByte(x_scale*16 + y_scale); // chroma scale factors
1223        else
1224            strm.putByte(1*16 + 1);
1225        strm.putByte( i > 0 ); // quantization table idx
1226    }
1227
1228    // put scan header
1229    strm.jputShort( 0xFFDA );          // SOS marker
1230    strm.jputShort( 6 + 2*channels );  // length of scan header
1231    strm.putByte( channels );          // number of components in the scan
1232
1233    for( i = 0; i < channels; i++ )
1234    {
1235        strm.putByte( i+1 );             // component id
1236        strm.putByte( (i>0)*16 + (i>0) );// selection of DC & AC tables
1237    }
1238
1239    strm.jputShort(0*256 + 63); // start and end of spectral selection - for
1240    // sequential DCT start is 0 and end is 63
1241
1242    strm.putByte( 0 );  // successive approximation bit position
1243    // high & low - (0,0) for sequential DCT
1244    unsigned currval = 0, code = 0, tempval = 0;
1245    int bit_idx = 32;
1246
1247#define JPUT_BITS(val, bits) \
1248    bit_idx -= (bits); \
1249    tempval = (val) & bit_mask[(bits)]; \
1250    if( bit_idx <= 0 ) \
1251    {  \
1252        strm.jput(currval | ((unsigned)tempval >> -bit_idx)); \
1253        bit_idx += 32; \
1254        currval = bit_idx < 32 ? (tempval << bit_idx) : 0; \
1255    } \
1256    else \
1257        currval |= (tempval << bit_idx)
1258
1259#define JPUT_HUFF(val, table) \
1260    code = table[(val) + 2]; \
1261    JPUT_BITS(code >> 8, (int)(code & 255))
1262
1263    // encode data
1264    for( y = 0; y < height; y += y_step, data += y_step*step )
1265    {
1266        for( x = 0; x < width; x += x_step )
1267        {
1268            int x_limit = x_step;
1269            int y_limit = y_step;
1270            const uchar* pix_data = data + x*input_channels;
1271            short* Y_data = block[0];
1272
1273            if( x + x_limit > width ) x_limit = width - x;
1274            if( y + y_limit > height ) y_limit = height - y;
1275
1276            memset( block, 0, block_count*64*sizeof(block[0][0]));
1277
1278            if( channels > 1 )
1279            {
1280                short* UV_data = block[luma_count];
1281                // double t = (double)cv::getTickCount();
1282
1283                if( colorspace == COLORSPACE_YUV444P && y_limit == 16 && x_limit == 16 )
1284                {
1285                    for( i = 0; i < y_limit; i += 2, pix_data += step*2, Y_data += Y_step*2, UV_data += UV_step )
1286                    {
1287#ifdef WITH_NEON
1288                        {
1289                            uint16x8_t masklo = vdupq_n_u16(255);
1290                            uint16x8_t lane = vld1q_u16((unsigned short*)(pix_data+v_plane_ofs));
1291                            uint16x8_t t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1292                            lane = vld1q_u16((unsigned short*)(pix_data + v_plane_ofs + step));
1293                            uint16x8_t t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1294                            t1 = vaddq_u16(t1, t2);
1295                            vst1q_s16(UV_data, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1296
1297                            lane = vld1q_u16((unsigned short*)(pix_data+u_plane_ofs));
1298                            t1 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1299                            lane = vld1q_u16((unsigned short*)(pix_data + u_plane_ofs + step));
1300                            t2 = vaddq_u16(vshrq_n_u16(lane, 8), vandq_u16(lane, masklo));
1301                            t1 = vaddq_u16(t1, t2);
1302                            vst1q_s16(UV_data + 8, vsubq_s16(vreinterpretq_s16_u16(t1), vdupq_n_s16(128*4)));
1303                        }
1304
1305                        {
1306                            int16x8_t lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data)));
1307                            int16x8_t delta = vdupq_n_s16(128);
1308                            lane = vsubq_s16(lane, delta);
1309                            vst1q_s16(Y_data, lane);
1310
1311                            lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+8)));
1312                            lane = vsubq_s16(lane, delta);
1313                            vst1q_s16(Y_data + 8, lane);
1314
1315                            lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data+step)));
1316                            lane = vsubq_s16(lane, delta);
1317                            vst1q_s16(Y_data+Y_step, lane);
1318
1319                            lane = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pix_data + step + 8)));
1320                            lane = vsubq_s16(lane, delta);
1321                            vst1q_s16(Y_data+Y_step + 8, lane);
1322                        }
1323#else
1324                        for( j = 0; j < x_limit; j += 2, pix_data += 2 )
1325                        {
1326                            Y_data[j] = pix_data[0] - 128;
1327                            Y_data[j+1] = pix_data[1] - 128;
1328                            Y_data[j+Y_step] = pix_data[step] - 128;
1329                            Y_data[j+Y_step+1] = pix_data[step+1] - 128;
1330
1331                            UV_data[j>>1] = pix_data[v_plane_ofs] + pix_data[v_plane_ofs+1] +
1332                                pix_data[v_plane_ofs+step] + pix_data[v_plane_ofs+step+1] - 128*4;
1333                            UV_data[(j>>1)+8] = pix_data[u_plane_ofs] + pix_data[u_plane_ofs+1] +
1334                                pix_data[u_plane_ofs+step] + pix_data[u_plane_ofs+step+1] - 128*4;
1335
1336                        }
1337
1338                        pix_data -= x_limit*input_channels;
1339#endif
1340                    }
1341                }
1342                else
1343                {
1344                    for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1345                    {
1346                        for( j = 0; j < x_limit; j++, pix_data += input_channels )
1347                        {
1348                            int Y, U, V;
1349
1350                            if( colorspace == COLORSPACE_BGR )
1351                            {
1352                                int r = pix_data[2];
1353                                int g = pix_data[1];
1354                                int b = pix_data[0];
1355
1356                                Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1357                                U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1358                                V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1359                            }
1360                            else if( colorspace == COLORSPACE_RGBA )
1361                            {
1362                                int r = pix_data[0];
1363                                int g = pix_data[1];
1364                                int b = pix_data[2];
1365
1366                                Y = DCT_DESCALE( r*y_r + g*y_g + b*y_b, fixc) - 128;
1367                                U = DCT_DESCALE( r*cb_r + g*cb_g + b*cb_b, fixc );
1368                                V = DCT_DESCALE( r*cr_r + g*cr_g + b*cr_b, fixc );
1369                            }
1370                            else
1371                            {
1372                                Y = pix_data[0] - 128;
1373                                U = pix_data[v_plane_ofs] - 128;
1374                                V = pix_data[u_plane_ofs] - 128;
1375                            }
1376
1377                            int j2 = j >> (x_scale - 1);
1378                            Y_data[j] = (short)Y;
1379                            UV_data[j2] = (short)(UV_data[j2] + U);
1380                            UV_data[j2 + 8] = (short)(UV_data[j2 + 8] + V);
1381                        }
1382
1383                        pix_data -= x_limit*input_channels;
1384                        if( ((i+1) & (y_scale - 1)) == 0 )
1385                        {
1386                            UV_data += UV_step;
1387                        }
1388                    }
1389                }
1390
1391                // total_cvt += (double)cv::getTickCount() - t;
1392            }
1393            else
1394            {
1395                for( i = 0; i < y_limit; i++, pix_data += step, Y_data += Y_step )
1396                {
1397                    for( j = 0; j < x_limit; j++ )
1398                        Y_data[j] = (short)(pix_data[j]*4 - 128*4);
1399                }
1400            }
1401
1402            for( i = 0; i < block_count; i++ )
1403            {
1404                int is_chroma = i >= luma_count;
1405                int src_step = x_scale * 8;
1406                int run = 0, val;
1407                const short* src_ptr = block[i & -2] + (i & 1)*8;
1408                const unsigned* htable = huff_ac_tab[is_chroma];
1409
1410                //double t = (double)cv::getTickCount();
1411                aan_fdct8x8( src_ptr, buffer, src_step, fdct_qtab[is_chroma] );
1412                //total_dct += (double)cv::getTickCount() - t;
1413
1414                j = is_chroma + (i > luma_count);
1415                val = buffer[0] - dc_pred[j];
1416                dc_pred[j] = buffer[0];
1417
1418                {
1419                    int cat = cat_table[val + CAT_TAB_SIZE];
1420
1421                    //CV_Assert( cat <= 11 );
1422                    JPUT_HUFF( cat, huff_dc_tab[is_chroma] );
1423                    JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
1424                }
1425
1426                for( j = 1; j < 64; j++ )
1427                {
1428                    val = buffer[zigzag[j]];
1429
1430                    if( val == 0 )
1431                    {
1432                        run++;
1433                    }
1434                    else
1435                    {
1436                        while( run >= 16 )
1437                        {
1438                            JPUT_HUFF( 0xF0, htable ); // encode 16 zeros
1439                            run -= 16;
1440                        }
1441
1442                        {
1443                            int cat = cat_table[val + CAT_TAB_SIZE];
1444                            //CV_Assert( cat <= 10 );
1445                            JPUT_HUFF( cat + run*16, htable );
1446                            JPUT_BITS( val - (val < 0 ? 1 : 0), cat );
1447                        }
1448
1449                        run = 0;
1450                    }
1451                }
1452
1453                if( run )
1454                {
1455                    JPUT_HUFF( 0x00, htable ); // encode EOB
1456                }
1457            }
1458        }
1459    }
1460
1461    // Flush
1462    strm.jflush(currval, bit_idx);
1463    strm.jputShort( 0xFFD9 ); // EOI marker
1464    /*printf("total dct = %.1fms, total cvt = %.1fms\n",
1465     total_dct*1000./cv::getTickFrequency(),
1466     total_cvt*1000./cv::getTickFrequency());*/
1467    size_t pos = strm.getPos();
1468    size_t pos1 = (pos + 3) & ~3;
1469    for( ; pos < pos1; pos++ )
1470        strm.putByte(0);
1471}
1472
1473}
1474
1475Ptr<IVideoWriter> createMotionJpegWriter( const String& filename, double fps, Size frameSize, bool iscolor )
1476{
1477    Ptr<IVideoWriter> iwriter = makePtr<mjpeg::MotionJpegWriter>(filename, fps, frameSize, iscolor);
1478    if( !iwriter->isOpened() )
1479        iwriter.release();
1480    return iwriter;
1481}
1482
1483}
1484