h264.c revision 1d80c6a8d34f59543f7df1963c22d7efa292bcb0
1/*
2 *  Copyright (C) 2012 Intel Corporation.  All Rights Reserved.
3 *
4 *  This is free software; you can redistribute it and/or modify
5 *  it under the terms of the GNU General Public License as published by
6 *  the Free Software Foundation; either version 2 of the License, or
7 *  (at your option) any later version.
8 *
9 *  This software is distributed in the hope that it will be useful,
10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 *  GNU General Public License for more details.
13 *
14 *  You should have received a copy of the GNU General Public License
15 *  along with this software; if not, write to the Free Software
16 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
17 *  USA.
18 */
19
20#ifdef LIBVNCSERVER_CONFIG_LIBVA
21
22#include <X11/Xlib.h>
23#include <va/va_x11.h>
24
25enum _slice_types {
26	SLICE_TYPE_P = 0,  /* Predicted */
27	SLICE_TYPE_B = 1,  /* Bi-predicted */
28	SLICE_TYPE_I = 2,  /* Intra coded */
29};
30
31#define SURFACE_NUM     7
32
33VADisplay       va_dpy = NULL;
34VAConfigID      va_config_id;
35VASurfaceID     va_surface_id[SURFACE_NUM];
36VAContextID     va_context_id = 0;
37
38VABufferID      va_pic_param_buf_id[SURFACE_NUM];
39VABufferID      va_mat_param_buf_id[SURFACE_NUM];
40VABufferID      va_sp_param_buf_id[SURFACE_NUM];
41VABufferID      va_d_param_buf_id[SURFACE_NUM];
42
43static int cur_height = 0;
44static int cur_width = 0;
45static unsigned int num_frames = 0;
46static int sid = 0;
47static unsigned int frame_id = 0;
48static int field_order_count = 0;
49static VASurfaceID curr_surface = VA_INVALID_ID;
50
51VAStatus gva_status;
52VASurfaceStatus gsurface_status;
53#define CHECK_SURF(X) \
54    gva_status = vaQuerySurfaceStatus(va_dpy, X, &gsurface_status); \
55    if (gsurface_status != 4) printf("ss: %d\n", gsurface_status);
56
57#ifdef _DEBUG
58#define DebugLog(A) rfbClientLog A
59#else
60#define DebugLog(A)
61#endif
62
63#define CHECK_VASTATUS(va_status,func)                  \
64    if (va_status != VA_STATUS_SUCCESS) {                   \
65        /*fprintf(stderr,"%s:%s (%d) failed,exit\n", __func__, func, __LINE__);*/ \
66        rfbClientErr("%s:%s:%d failed (0x%x),exit\n", __func__, func, __LINE__, va_status); \
67        exit(1);                                \
68    } else  { \
69        /*fprintf(stderr,">> SUCCESS for: %s:%s (%d)\n", __func__, func, __LINE__);*/ \
70        DebugLog(("%s:%s:%d success\n", __func__, func, __LINE__)); \
71    }
72
73/*
74 * Forward declarations
75 */
76static void h264_decode_frame(int f_width, int f_height, char *framedata, int framesize, int slice_type);
77static void SetVAPictureParameterBufferH264(VAPictureParameterBufferH264 *p, int width, int height);
78static void SetVASliceParameterBufferH264(VASliceParameterBufferH264 *p);
79static void SetVASliceParameterBufferH264_Intra(VASliceParameterBufferH264 *p, int first);
80
81static void put_updated_rectangle(rfbClient *client, int x, int y, int width, int height, int f_width, int f_height, int first_for_frame);
82static void nv12_to_rgba(const VAImage vaImage, rfbClient *client, int ch_x, int ch_y, int ch_w, int ch_h);
83
84
85/* FIXME: get this value from the server instead of hardcoding 32bit pixels */
86#define BPP (4 * 8)
87
88static const char *string_of_FOURCC(uint32_t fourcc)
89{
90    static int buf;
91    static char str[2][5];
92
93    buf ^= 1;
94    str[buf][0] = fourcc;
95    str[buf][1] = fourcc >> 8;
96    str[buf][2] = fourcc >> 16;
97    str[buf][3] = fourcc >> 24;
98    str[buf][4] = '\0';
99    return str[buf];
100}
101
102static inline const char *string_of_VAImageFormat(VAImageFormat *imgfmt)
103{
104    return string_of_FOURCC(imgfmt->fourcc);
105}
106
107
108static rfbBool
109HandleH264 (rfbClient* client, int rx, int ry, int rw, int rh)
110{
111    rfbH264Header hdr;
112    char *framedata;
113
114    DebugLog(("Framebuffer update with H264 (x: %d, y: %d, w: %d, h: %d)\n", rx, ry, rw, rh));
115
116    /* First, read the frame size and allocate buffer to store the data */
117    if (!ReadFromRFBServer(client, (char *)&hdr, sz_rfbH264Header))
118        return FALSE;
119
120    hdr.slice_type = rfbClientSwap32IfLE(hdr.slice_type);
121    hdr.nBytes = rfbClientSwap32IfLE(hdr.nBytes);
122    hdr.width = rfbClientSwap32IfLE(hdr.width);
123    hdr.height = rfbClientSwap32IfLE(hdr.height);
124
125    framedata = (char*) malloc(hdr.nBytes);
126
127    /* Obtain frame data from the server */
128    DebugLog(("Reading %d bytes of frame data (type: %d)\n", hdr.nBytes, hdr.slice_type));
129    if (!ReadFromRFBServer(client, framedata, hdr.nBytes))
130        return FALSE;
131
132    /* First make sure we have a large enough raw buffer to hold the
133     * decompressed data.  In practice, with a fixed BPP, fixed frame
134     * buffer size and the first update containing the entire frame
135     * buffer, this buffer allocation should only happen once, on the
136     * first update.
137     */
138    if ( client->raw_buffer_size < (( rw * rh ) * ( BPP / 8 ))) {
139        if ( client->raw_buffer != NULL ) {
140            free( client->raw_buffer );
141        }
142
143        client->raw_buffer_size = (( rw * rh ) * ( BPP / 8 ));
144        client->raw_buffer = (char*) malloc( client->raw_buffer_size );
145        rfbClientLog("Allocated raw buffer of %d bytes (%dx%dx%d BPP)\n", client->raw_buffer_size, rw, rh, BPP);
146    }
147
148    /* Decode frame if frame data was sent. Server only sends frame data for the first
149     * framebuffer update message for a particular frame buffer contents.
150     * If more than 1 rectangle is updated, the messages after the first one (with
151     * the H.264 frame) have nBytes == 0.
152     */
153    if (hdr.nBytes > 0) {
154        DebugLog(("  decoding %d bytes of H.264 data\n", hdr.nBytes));
155        h264_decode_frame(hdr.width, hdr.height, framedata, hdr.nBytes, hdr.slice_type);
156    }
157
158    DebugLog(("  updating rectangle (%d, %d)-(%d, %d)\n", rx, ry, rw, rh));
159    put_updated_rectangle(client, rx, ry, rw, rh, hdr.width, hdr.height, hdr.nBytes != 0);
160
161    free(framedata);
162
163    return TRUE;
164}
165
166static void h264_cleanup_decoder()
167{
168    VAStatus va_status;
169
170    rfbClientLog("%s()\n", __FUNCTION__);
171
172    if (va_surface_id[0] != VA_INVALID_ID) {
173        va_status = vaDestroySurfaces(va_dpy, &va_surface_id[0], SURFACE_NUM);
174        CHECK_VASTATUS(va_status, "vaDestroySurfaces");
175    }
176
177    if (va_context_id) {
178        va_status = vaDestroyContext(va_dpy, va_context_id);
179        CHECK_VASTATUS(va_status, "vaDestroyContext");
180        va_context_id = 0;
181    }
182
183    num_frames = 0;
184    sid = 0;
185    frame_id = 0;
186    field_order_count = 0;
187}
188
189static void h264_init_decoder(int width, int height)
190{
191    VAStatus va_status;
192
193    if (va_context_id) {
194        rfbClientLog("%s: va_dpy already initialized\n", __FUNCTION__);
195    }
196
197    if (va_dpy != NULL) {
198        rfbClientLog("%s: Re-initializing H.264 decoder\n", __FUNCTION__);
199    }
200    else {
201        rfbClientLog("%s: initializing H.264 decoder\n", __FUNCTION__);
202
203        /* Attach VA display to local X display */
204        Display *win_display = (Display *)XOpenDisplay(":0.0");
205        if (win_display == NULL) {
206            rfbClientErr("Can't connect to local display\n");
207            exit(-1);
208        }
209
210        int major_ver, minor_ver;
211        va_dpy = vaGetDisplay(win_display);
212        va_status = vaInitialize(va_dpy, &major_ver, &minor_ver);
213        CHECK_VASTATUS(va_status, "vaInitialize");
214        rfbClientLog("%s: libva version %d.%d found\n", __FUNCTION__, major_ver, minor_ver);
215    }
216
217    /* Check for VLD entrypoint */
218    int num_entrypoints;
219    VAEntrypoint    entrypoints[5];
220    int vld_entrypoint_found = 0;
221
222    /* Change VAProfileH264High if needed */
223    VAProfile profile = VAProfileH264High;
224    va_status = vaQueryConfigEntrypoints(va_dpy, profile, entrypoints, &num_entrypoints);
225    CHECK_VASTATUS(va_status, "vaQueryConfigEntrypoints");
226    int i;
227    for (i = 0; i < num_entrypoints; ++i) {
228        if (entrypoints[i] == VAEntrypointVLD) {
229            vld_entrypoint_found = 1;
230            break;
231        }
232    }
233
234    if (vld_entrypoint_found == 0) {
235        rfbClientErr("VLD entrypoint not found\n");
236        exit(1);
237    }
238
239    /* Create configuration for the decode pipeline */
240    VAConfigAttrib attrib;
241    attrib.type = VAConfigAttribRTFormat;
242    va_status = vaCreateConfig(va_dpy, profile, VAEntrypointVLD, &attrib, 1, &va_config_id);
243    CHECK_VASTATUS(va_status, "vaCreateConfig");
244
245    /* Create VA surfaces */
246    for (i = 0; i < SURFACE_NUM; ++i) {
247        va_surface_id[i]       = VA_INVALID_ID;
248        va_pic_param_buf_id[i] = VA_INVALID_ID;
249        va_mat_param_buf_id[i] = VA_INVALID_ID;
250        va_sp_param_buf_id[i]  = VA_INVALID_ID;
251        va_d_param_buf_id[i]   = VA_INVALID_ID;
252    }
253    va_status = vaCreateSurfaces(va_dpy, width, height, VA_RT_FORMAT_YUV420, SURFACE_NUM, &va_surface_id[0]);
254    CHECK_VASTATUS(va_status, "vaCreateSurfaces");
255    for (i = 0; i < SURFACE_NUM; ++i) {
256        DebugLog(("%s: va_surface_id[%d] = %p\n", __FUNCTION__, i, va_surface_id[i]));
257    }
258
259    /* Create VA context */
260    va_status = vaCreateContext(va_dpy, va_config_id, width, height, 0/*VA_PROGRESSIVE*/,  &va_surface_id[0], SURFACE_NUM, &va_context_id);
261    CHECK_VASTATUS(va_status, "vaCreateContext");
262    DebugLog(("%s: VA context created (id: %d)\n", __FUNCTION__, va_context_id));
263
264
265    /* Instantiate decode pipeline */
266    va_status = vaBeginPicture(va_dpy, va_context_id, va_surface_id[0]);
267    CHECK_VASTATUS(va_status, "vaBeginPicture");
268
269    rfbClientLog("%s: H.264 decoder initialized\n", __FUNCTION__);
270}
271
272static void h264_decode_frame(int f_width, int f_height, char *framedata, int framesize, int slice_type)
273{
274    VAStatus va_status;
275
276    DebugLog(("%s: called for frame of %d bytes (%dx%d) slice_type=%d\n", __FUNCTION__, framesize, width, height, slice_type));
277
278    /* Initialize decode pipeline if necessary */
279    if ( (f_width > cur_width) || (f_height > cur_height) ) {
280        if (va_dpy != NULL)
281            h264_cleanup_decoder();
282        cur_width = f_width;
283        cur_height = f_height;
284
285        h264_init_decoder(f_width, f_height);
286        rfbClientLog("%s: decoder initialized\n", __FUNCTION__);
287    }
288
289    /* Decode frame */
290    static VAPictureH264 va_picture_h264, va_old_picture_h264;
291
292    /* The server should always send an I-frame when a new client connects
293     * or when the resolution of the framebuffer changes, but we check
294     * just in case.
295     */
296    if ( (slice_type != SLICE_TYPE_I) && (num_frames == 0) ) {
297        rfbClientLog("First frame is not an I frame !!! Skipping!!!\n");
298        return;
299    }
300
301    DebugLog(("%s: frame_id=%d va_surface_id[%d]=0x%x field_order_count=%d\n", __FUNCTION__, frame_id, sid, va_surface_id[sid], field_order_count));
302
303    va_picture_h264.picture_id = va_surface_id[sid];
304    va_picture_h264.frame_idx  = frame_id;
305    va_picture_h264.flags = 0;
306    va_picture_h264.BottomFieldOrderCnt = field_order_count;
307    va_picture_h264.TopFieldOrderCnt = field_order_count;
308
309    /* Set up picture parameter buffer */
310    if (va_pic_param_buf_id[sid] == VA_INVALID_ID) {
311        va_status = vaCreateBuffer(va_dpy, va_context_id, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferH264), 1, NULL, &va_pic_param_buf_id[sid]);
312        CHECK_VASTATUS(va_status, "vaCreateBuffer(PicParam)");
313    }
314    CHECK_SURF(va_surface_id[sid]);
315
316    VAPictureParameterBufferH264 *pic_param_buf = NULL;
317    va_status = vaMapBuffer(va_dpy, va_pic_param_buf_id[sid], (void **)&pic_param_buf);
318    CHECK_VASTATUS(va_status, "vaMapBuffer(PicParam)");
319
320    SetVAPictureParameterBufferH264(pic_param_buf, f_width, f_height);
321    memcpy(&pic_param_buf->CurrPic, &va_picture_h264, sizeof(VAPictureH264));
322
323    if (slice_type == SLICE_TYPE_P) {
324        memcpy(&pic_param_buf->ReferenceFrames[0], &va_old_picture_h264, sizeof(VAPictureH264));
325        pic_param_buf->ReferenceFrames[0].flags = 0;
326    }
327    else if (slice_type != SLICE_TYPE_I) {
328        rfbClientLog("Frame type %d not supported!!!\n");
329        return;
330    }
331    pic_param_buf->frame_num = frame_id;
332
333    va_status = vaUnmapBuffer(va_dpy, va_pic_param_buf_id[sid]);
334    CHECK_VASTATUS(va_status, "vaUnmapBuffer(PicParam)");
335
336    /* Set up IQ matrix buffer */
337    if (va_mat_param_buf_id[sid] == VA_INVALID_ID) {
338        va_status = vaCreateBuffer(va_dpy, va_context_id, VAIQMatrixBufferType, sizeof(VAIQMatrixBufferH264), 1, NULL, &va_mat_param_buf_id[sid]);
339        CHECK_VASTATUS(va_status, "vaCreateBuffer(IQMatrix)");
340    }
341    CHECK_SURF(va_surface_id[sid]);
342
343    VAIQMatrixBufferH264 *iq_matrix_buf = NULL;
344    va_status = vaMapBuffer(va_dpy, va_mat_param_buf_id[sid], (void **)&iq_matrix_buf);
345    CHECK_VASTATUS(va_status, "vaMapBuffer(IQMatrix)");
346
347    static const unsigned char m_MatrixBufferH264[]= {
348        /* ScalingList4x4[6][16] */
349        0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
350        0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
351        0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
352        0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
353        0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
354        0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
355        /* ScalingList8x8[2][64] */
356        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
357        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
361        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
362        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
366        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
367        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
371        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
372    };
373
374    memcpy(iq_matrix_buf, m_MatrixBufferH264, 224);
375    va_status = vaUnmapBuffer(va_dpy, va_mat_param_buf_id[sid]);
376    CHECK_VASTATUS(va_status, "vaUnmapBuffer(IQMatrix)");
377
378    VABufferID buffer_ids[2];
379    buffer_ids[0] = va_pic_param_buf_id[sid];
380    buffer_ids[1] = va_mat_param_buf_id[sid];
381
382    CHECK_SURF(va_surface_id[sid]);
383    va_status = vaRenderPicture(va_dpy, va_context_id, buffer_ids, 2);
384    CHECK_VASTATUS(va_status, "vaRenderPicture");
385
386    /* Set up slice parameter buffer */
387    if (va_sp_param_buf_id[sid] == VA_INVALID_ID) {
388        va_status = vaCreateBuffer(va_dpy, va_context_id, VASliceParameterBufferType, sizeof(VASliceParameterBufferH264), 1, NULL, &va_sp_param_buf_id[sid]);
389        CHECK_VASTATUS(va_status, "vaCreateBuffer(SliceParam)");
390    }
391    CHECK_SURF(va_surface_id[sid]);
392
393    VASliceParameterBufferH264 *slice_param_buf = NULL;
394    va_status = vaMapBuffer(va_dpy, va_sp_param_buf_id[sid], (void **)&slice_param_buf);
395    CHECK_VASTATUS(va_status, "vaMapBuffer(SliceParam)");
396
397    static int t2_first = 1;
398    if (slice_type == SLICE_TYPE_I) {
399        SetVASliceParameterBufferH264_Intra(slice_param_buf, t2_first);
400        t2_first = 0;
401    } else {
402        SetVASliceParameterBufferH264(slice_param_buf);
403        memcpy(&slice_param_buf->RefPicList0[0], &va_old_picture_h264, sizeof(VAPictureH264));
404        slice_param_buf->RefPicList0[0].flags = 0;
405    }
406    slice_param_buf->slice_data_bit_offset = 0;
407    slice_param_buf->slice_data_size = framesize;
408
409    va_status = vaUnmapBuffer(va_dpy, va_sp_param_buf_id[sid]);
410    CHECK_VASTATUS(va_status, "vaUnmapBuffer(SliceParam)");
411    CHECK_SURF(va_surface_id[sid]);
412
413    /* Set up slice data buffer and copy H.264 encoded data */
414    if (va_d_param_buf_id[sid] == VA_INVALID_ID) {
415        /* TODO use estimation matching framebuffer dimensions instead of this large value */
416        va_status = vaCreateBuffer(va_dpy, va_context_id, VASliceDataBufferType, 4177920, 1, NULL, &va_d_param_buf_id[sid]); /* 1080p size */
417        CHECK_VASTATUS(va_status, "vaCreateBuffer(SliceData)");
418    }
419
420    char *slice_data_buf;
421    va_status = vaMapBuffer(va_dpy, va_d_param_buf_id[sid], (void **)&slice_data_buf);
422    CHECK_VASTATUS(va_status, "vaMapBuffer(SliceData)");
423    memcpy(slice_data_buf, framedata, framesize);
424
425    CHECK_SURF(va_surface_id[sid]);
426    va_status = vaUnmapBuffer(va_dpy, va_d_param_buf_id[sid]);
427    CHECK_VASTATUS(va_status, "vaUnmapBuffer(SliceData)");
428
429    buffer_ids[0] = va_sp_param_buf_id[sid];
430    buffer_ids[1] = va_d_param_buf_id[sid];
431
432    CHECK_SURF(va_surface_id[sid]);
433    va_status = vaRenderPicture(va_dpy, va_context_id, buffer_ids, 2);
434    CHECK_VASTATUS(va_status, "vaRenderPicture");
435
436    va_status = vaEndPicture(va_dpy, va_context_id);
437    CHECK_VASTATUS(va_status, "vaEndPicture");
438
439    /* Prepare next one... */
440    int sid_new = (sid + 1) % SURFACE_NUM;
441    DebugLog(("%s: new Surface ID = %d\n", __FUNCTION__, sid_new));
442    va_status = vaBeginPicture(va_dpy, va_context_id, va_surface_id[sid_new]);
443    CHECK_VASTATUS(va_status, "vaBeginPicture");
444
445    /* Get decoded data */
446    va_status = vaSyncSurface(va_dpy, va_surface_id[sid]);
447    CHECK_VASTATUS(va_status, "vaSyncSurface");
448    CHECK_SURF(va_surface_id[sid]);
449
450    curr_surface = va_surface_id[sid];
451
452    sid = sid_new;
453
454    field_order_count += 2;
455    ++frame_id;
456    if (frame_id > 15) {
457        frame_id = 0;
458    }
459
460    ++num_frames;
461
462    memcpy(&va_old_picture_h264, &va_picture_h264, sizeof(VAPictureH264));
463}
464
465static void put_updated_rectangle(rfbClient *client, int x, int y, int width, int height, int f_width, int f_height, int first_for_frame)
466{
467    if (curr_surface == VA_INVALID_ID) {
468        rfbClientErr("%s: called, but current surface is invalid\n", __FUNCTION__);
469        return;
470    }
471
472    VAStatus va_status;
473
474    if (client->outputWindow) {
475        /* use efficient vaPutSurface() method of putting the framebuffer on the screen */
476        if (first_for_frame) {
477            /* vaPutSurface() clears window contents outside the given destination rectangle => always update full screen. */
478            va_status = vaPutSurface(va_dpy, curr_surface, client->outputWindow, 0, 0, f_width, f_height, 0, 0, f_width, f_height, NULL, 0, VA_FRAME_PICTURE);
479            CHECK_VASTATUS(va_status, "vaPutSurface");
480        }
481    }
482    else if (client->frameBuffer) {
483        /* ... or copy the changed framebuffer region manually as a fallback */
484        VAImage decoded_image;
485        decoded_image.image_id = VA_INVALID_ID;
486        decoded_image.buf      = VA_INVALID_ID;
487        va_status = vaDeriveImage(va_dpy, curr_surface, &decoded_image);
488        CHECK_VASTATUS(va_status, "vaDeriveImage");
489
490        if ((decoded_image.image_id == VA_INVALID_ID) || (decoded_image.buf == VA_INVALID_ID)) {
491            rfbClientErr("%s: vaDeriveImage() returned success but VA image is invalid (id: %d, buf: %d)\n", __FUNCTION__, decoded_image.image_id, decoded_image.buf);
492        }
493
494        nv12_to_rgba(decoded_image, client, x, y, width, height);
495
496        va_status = vaDestroyImage(va_dpy, decoded_image.image_id);
497        CHECK_VASTATUS(va_status, "vaDestroyImage");
498    }
499}
500
501static void SetVAPictureParameterBufferH264(VAPictureParameterBufferH264 *p, int width, int height)
502{
503    int i;
504    unsigned int width_in_mbs = (width + 15) / 16;
505    unsigned int height_in_mbs = (height + 15) / 16;
506
507    memset(p, 0, sizeof(VAPictureParameterBufferH264));
508    p->picture_width_in_mbs_minus1 = width_in_mbs - 1;
509    p->picture_height_in_mbs_minus1 = height_in_mbs - 1;
510    p->num_ref_frames = 1;
511    p->seq_fields.value = 145;
512    p->pic_fields.value = 0x501;
513    for (i = 0; i < 16; i++) {
514        p->ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID;
515        p->ReferenceFrames[i].picture_id = 0xffffffff;
516    }
517}
518
519static void SetVASliceParameterBufferH264(VASliceParameterBufferH264 *p)
520{
521    int i;
522    memset(p, 0, sizeof(VASliceParameterBufferH264));
523    p->slice_data_size = 0;
524    p->slice_data_bit_offset = 64;
525    p->slice_alpha_c0_offset_div2 = 2;
526    p->slice_beta_offset_div2 = 2;
527    p->chroma_weight_l0_flag = 1;
528    p->chroma_weight_l0[0][0]=1;
529    p->chroma_offset_l0[0][0]=0;
530    p->chroma_weight_l0[0][1]=1;
531    p->chroma_offset_l0[0][1]=0;
532    p->luma_weight_l1_flag = 1;
533    p->chroma_weight_l1_flag = 1;
534    p->luma_weight_l0[0]=0x01;
535    for (i = 0; i < 32; i++) {
536        p->RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
537        p->RefPicList1[i].flags = VA_PICTURE_H264_INVALID;
538    }
539    p->RefPicList1[0].picture_id = 0xffffffff;
540}
541
542static void SetVASliceParameterBufferH264_Intra(VASliceParameterBufferH264 *p, int first)
543{
544    int i;
545    memset(p, 0, sizeof(VASliceParameterBufferH264));
546    p->slice_data_size = 0;
547    p->slice_data_bit_offset = 64;
548    p->slice_alpha_c0_offset_div2 = 2;
549    p->slice_beta_offset_div2 = 2;
550    p->slice_type = 2;
551    if (first) {
552        p->luma_weight_l0_flag = 1;
553        p->chroma_weight_l0_flag = 1;
554        p->luma_weight_l1_flag = 1;
555        p->chroma_weight_l1_flag = 1;
556    } else {
557        p->chroma_weight_l0_flag = 1;
558        p->chroma_weight_l0[0][0]=1;
559        p->chroma_offset_l0[0][0]=0;
560        p->chroma_weight_l0[0][1]=1;
561        p->chroma_offset_l0[0][1]=0;
562        p->luma_weight_l1_flag = 1;
563        p->chroma_weight_l1_flag = 1;
564        p->luma_weight_l0[0]=0x01;
565    }
566    for (i = 0; i < 32; i++) {
567        p->RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
568        p->RefPicList1[i].flags = VA_PICTURE_H264_INVALID;
569    }
570    p->RefPicList1[0].picture_id = 0xffffffff;
571    p->RefPicList0[0].picture_id = 0xffffffff;
572}
573
574static void nv12_to_rgba(const VAImage vaImage, rfbClient *client, int ch_x, int ch_y, int ch_w, int ch_h)
575{
576    DebugLog(("%s: converting region (%d, %d)-(%d, %d) from NV12->RGBA\n", __FUNCTION__, ch_x, ch_y, ch_w, ch_h));
577
578    VAStatus va_status;
579    uint8_t *nv12_buf;
580    va_status = vaMapBuffer(va_dpy, vaImage.buf, (void **)&nv12_buf);
581    CHECK_VASTATUS(va_status, "vaMapBuffer(DecodedData)");
582
583    /* adjust x, y, width, height of the affected area so
584     * x, y, width and height are always even.
585     */
586    if (ch_x % 2) { --ch_x; ++ch_w; }
587    if (ch_y % 2) { --ch_y; ++ch_h; }
588    if ((ch_x + ch_w) % 2) { ++ch_w; }
589    if ((ch_y + ch_h) % 2) { ++ch_h; }
590
591    /* point nv12_buf and dst to upper left corner of changed area */
592    uint8_t *nv12_y  = &nv12_buf[vaImage.offsets[0] + vaImage.pitches[0] * ch_y + ch_x];
593    uint8_t *nv12_uv = &nv12_buf[vaImage.offsets[1] + vaImage.pitches[1] * (ch_y / 2) + ch_x];
594    uint32_t *dst    = &((uint32_t*)client->frameBuffer)[client->width * ch_y + ch_x];
595
596    /* TODO: optimize R, G, B calculation. Possible ways to do this:
597     *       - use lookup tables
598     *       - convert from floating point to integer arithmetic
599     *       - use MMX/SSE to vectorize calculations
600     *       - use GPU (VA VPP, shader...)
601     */
602    int src_x, src_y;
603    for (src_y = 0; src_y < ch_h; src_y += 2) {
604        for (src_x = 0; src_x < ch_w; src_x += 2) {
605            uint8_t nv_u = nv12_uv[src_x];
606            uint8_t nv_v = nv12_uv[src_x + 1];
607            uint8_t nv_y[4] = { nv12_y[                     src_x], nv12_y[                     src_x + 1],
608                                nv12_y[vaImage.pitches[0] + src_x], nv12_y[vaImage.pitches[0] + src_x + 1] };
609
610        int i;
611            for (i = 0; i < 4; ++i) {
612                double R = 1.164 * (nv_y[i] - 16)                        + 1.596 * (nv_v - 128);
613                double G = 1.164 * (nv_y[i] - 16) - 0.391 * (nv_u - 128) - 0.813 * (nv_v - 128);
614                double B = 1.164 * (nv_y[i] - 16) + 2.018 * (nv_u - 128);
615
616                /* clamp R, G, B values. For some Y, U, V combinations,
617                 * the results of the above calculations fall outside of
618                 * the range 0-255.
619                 */
620                if (R < 0.0) R = 0.0;
621                if (G < 0.0) G = 0.0;
622                if (B < 0.0) B = 0.0;
623                if (R > 255.0) R = 255.0;
624                if (G > 255.0) G = 255.0;
625                if (B > 255.0) B = 255.0;
626
627                dst[client->width * (i / 2) + src_x + (i % 2)] = 0
628                               | ((unsigned int)(R + 0.5) << client->format.redShift)
629                               | ((unsigned int)(G + 0.5) << client->format.greenShift)
630                               | ((unsigned int)(B + 0.5) << client->format.blueShift);
631            }
632        }
633
634        nv12_y  += 2 * vaImage.pitches[0];
635        nv12_uv += vaImage.pitches[1];
636        dst     += 2 * client->width;
637    }
638
639    CHECK_SURF(va_surface_id[sid]);
640    va_status = vaUnmapBuffer(va_dpy, vaImage.buf);
641    CHECK_VASTATUS(va_status, "vaUnmapBuffer(DecodedData)");
642}
643
644#endif /* LIBVNCSERVER_CONFIG_LIBVA */
645