1/*
2 * Copyright (c) 2011 Intel Corporation. All Rights Reserved.
3 * Copyright (c) Imagination Technologies Limited, UK
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 *    Zeng Li <zeng.li@intel.com>
27 *    Shengquan Yuan  <shengquan.yuan@intel.com>
28 *    Binglin Chen <binglin.chen@intel.com>
29 *
30 */
31
32#include "lnc_cmdbuf.h"
33
34#include <unistd.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <errno.h>
38#include <string.h>
39#include <wsbm/wsbm_manager.h>
40
41#include "psb_def.h"
42#include "psb_drv_debug.h"
43#include "lnc_hostcode.h"
44#include "psb_ws_driver.h"
45
46/*
47 * Buffer layout:
48 *         cmd_base <= cmd_idx < CMD_END() == reloc_base
49 *         reloc_base <= reloc_idx < RELOC_END() == (reloc_size)
50 */
51
52#define RELOC_END(cmdbuf)     (cmdbuf->cmd_base + cmdbuf->size)
53
54#define CMD_END(cmdbuf)       (cmdbuf->reloc_base)
55
56#define RELOC_SIZE            (0x3000)
57
58#define CMD_SIZE              (0x3000)
59
60#define RELOC_MARGIN          (0x0800)
61
62#define CMD_MARGIN            (0x0400)
63
64
65#define MAX_CMD_COUNT         12
66
67#define MTX_SEG_SIZE          (0x0800)
68
69/*
70 * Create command buffer
71 */
72VAStatus lnc_cmdbuf_create(
73    object_context_p obj_context,
74    psb_driver_data_p driver_data,
75    lnc_cmdbuf_p cmdbuf)
76{
77    context_ENC_p ctx = (context_ENC_p) obj_context->format_data;
78    VAStatus vaStatus = VA_STATUS_SUCCESS;
79    unsigned int size = CMD_SIZE + RELOC_SIZE;
80
81    cmdbuf->size = 0;
82    cmdbuf->cmd_base = NULL;
83    cmdbuf->cmd_idx = NULL;
84    cmdbuf->reloc_base = NULL;
85    cmdbuf->reloc_idx = NULL;
86    cmdbuf->buffer_refs_count = 0;
87    cmdbuf->buffer_refs_allocated = 10;
88    cmdbuf->buffer_refs = (psb_buffer_p *) calloc(1, sizeof(psb_buffer_p) * cmdbuf->buffer_refs_allocated);
89    if (NULL == cmdbuf->buffer_refs) {
90        cmdbuf->buffer_refs_allocated = 0;
91        vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
92    }
93    if (VA_STATUS_SUCCESS == vaStatus) {
94        vaStatus = psb_buffer_create(driver_data, size, psb_bt_cpu_only, &cmdbuf->buf);
95        cmdbuf->size = size;
96    }
97
98    if (VA_STATUS_SUCCESS != vaStatus) {
99        free(cmdbuf->buffer_refs);
100        cmdbuf->buffer_refs = NULL;
101        cmdbuf->buffer_refs_allocated = 0;
102        return vaStatus;
103    }
104
105    /* create topaz parameter buffer */
106    vaStatus = psb_buffer_create(driver_data, ctx->pic_params_size, psb_bt_cpu_vpu, &cmdbuf->pic_params);
107    if (VA_STATUS_SUCCESS != vaStatus)
108        goto error_out3;
109
110    /* create header buffer */
111    vaStatus = psb_buffer_create(driver_data, ctx->header_buffer_size, psb_bt_cpu_vpu, &cmdbuf->header_mem);
112    if (VA_STATUS_SUCCESS != vaStatus)
113        goto error_out2;
114
115    /* create slice parameter buffer */
116    vaStatus = psb_buffer_create(driver_data, ctx->sliceparam_buffer_size, psb_bt_cpu_vpu, &cmdbuf->slice_params);
117    if (VA_STATUS_SUCCESS != vaStatus)
118        goto error_out1;
119
120    /* all cmdbuf share one MTX_CURRENT_IN_PARAMS since every MB has a MTX_CURRENT_IN_PARAMS structure
121     * and filling this structure for all MB is very time-consuming
122     */
123    cmdbuf->topaz_in_params_I = &ctx->topaz_in_params_I;
124    cmdbuf->topaz_in_params_P = &ctx->topaz_in_params_P;
125    cmdbuf->topaz_above_bellow_params = &ctx->topaz_above_bellow_params;
126
127    return vaStatus;
128
129error_out1:
130    psb_buffer_destroy(&cmdbuf->header_mem);
131error_out2:
132    psb_buffer_destroy(&cmdbuf->pic_params);
133error_out3:
134    lnc_cmdbuf_destroy(cmdbuf);
135
136    return vaStatus;
137}
138
139/*
140 * Destroy buffer
141 */
142void lnc_cmdbuf_destroy(lnc_cmdbuf_p cmdbuf)
143{
144    if (cmdbuf->size) {
145        psb_buffer_destroy(&cmdbuf->buf);
146        cmdbuf->size = 0;
147    }
148    if (cmdbuf->buffer_refs_allocated) {
149        free(cmdbuf->buffer_refs);
150        cmdbuf->buffer_refs = NULL;
151        cmdbuf->buffer_refs_allocated = 0;
152    }
153
154    psb_buffer_destroy(&cmdbuf->pic_params);
155    psb_buffer_destroy(&cmdbuf->header_mem);
156    psb_buffer_destroy(&cmdbuf->slice_params);
157
158}
159
160/*
161 * Reset buffer & map
162 *
163 * Returns 0 on success
164 */
165int lnc_cmdbuf_reset(lnc_cmdbuf_p cmdbuf)
166{
167    int ret;
168
169    cmdbuf->cmd_base = NULL;
170    cmdbuf->cmd_idx = NULL;
171    cmdbuf->reloc_base = NULL;
172    cmdbuf->reloc_idx = NULL;
173
174    cmdbuf->buffer_refs_count = 0;
175    cmdbuf->cmd_count = 0;
176
177    ret = psb_buffer_map(&cmdbuf->buf, &cmdbuf->cmd_base);
178    if (ret) {
179        return ret;
180    }
181
182    cmdbuf->cmd_start = cmdbuf->cmd_base;
183    cmdbuf->cmd_idx = (uint32_t *) cmdbuf->cmd_base;
184
185    cmdbuf->reloc_base = cmdbuf->cmd_base + CMD_SIZE;
186    cmdbuf->reloc_idx = (struct drm_psb_reloc *) cmdbuf->reloc_base;
187
188    /* Add ourselves to the buffer list */
189    lnc_cmdbuf_buffer_ref(cmdbuf, &cmdbuf->buf); /* cmd buf == 0 */
190    return ret;
191}
192
193/*
194 * Unmap buffer
195 *
196 * Returns 0 on success
197 */
198int lnc_cmdbuf_unmap(lnc_cmdbuf_p cmdbuf)
199{
200    cmdbuf->cmd_base = NULL;
201    cmdbuf->cmd_start = NULL;
202    cmdbuf->cmd_idx = NULL;
203    cmdbuf->reloc_base = NULL;
204    cmdbuf->reloc_idx = NULL;
205    cmdbuf->cmd_count = 0;
206    psb_buffer_unmap(&cmdbuf->buf);
207    return 0;
208}
209
210
211/*
212 * Reference an addtional buffer "buf" in the command stream
213 * Returns a reference index that can be used to refer to "buf" in
214 * relocation records, -1 on error
215 */
216int lnc_cmdbuf_buffer_ref(lnc_cmdbuf_p cmdbuf, psb_buffer_p buf)
217{
218    int item_loc = 0;
219
220    while ((item_loc < cmdbuf->buffer_refs_count) && (cmdbuf->buffer_refs[item_loc] != buf)) {
221        item_loc++;
222    }
223    if (item_loc == cmdbuf->buffer_refs_count) {
224        /* Add new entry */
225        if (item_loc >= cmdbuf->buffer_refs_allocated) {
226            /* Allocate more entries */
227            int new_size = cmdbuf->buffer_refs_allocated + 10;
228            psb_buffer_p *new_array;
229            new_array = (psb_buffer_p *) calloc(1, sizeof(psb_buffer_p) * new_size);
230            if (NULL == new_array) {
231                return -1; /* Allocation failure */
232            }
233            memcpy(new_array, cmdbuf->buffer_refs, sizeof(psb_buffer_p) * cmdbuf->buffer_refs_allocated);
234            free(cmdbuf->buffer_refs);
235            cmdbuf->buffer_refs_allocated = new_size;
236            cmdbuf->buffer_refs = new_array;
237        }
238        cmdbuf->buffer_refs[item_loc] = buf;
239        cmdbuf->buffer_refs_count++;
240        buf->status = psb_bs_queued;
241    }
242    return item_loc;
243}
244
245/* Creates a relocation record for a DWORD in the mapped "cmdbuf" at address
246 * "addr_in_cmdbuf"
247 * The relocation is based on the device virtual address of "ref_buffer"
248 * "buf_offset" is be added to the device virtual address, and the sum is then
249 * right shifted with "align_shift".
250 * "mask" determines which bits of the target DWORD will be updated with the so
251 * constructed address. The remaining bits will be filled with bits from "background".
252 */
253void lnc_cmdbuf_add_relocation(lnc_cmdbuf_p cmdbuf,
254                               uint32_t *addr_in_dst_buffer,/*addr of dst_buffer for the DWORD*/
255                               psb_buffer_p ref_buffer,
256                               uint32_t buf_offset,
257                               uint32_t mask,
258                               uint32_t background,
259                               uint32_t align_shift,
260                               uint32_t dst_buffer,
261                               uint32_t *start_of_dst_buffer) /*Index of the list refered by cmdbuf->buffer_refs */
262{
263    struct drm_psb_reloc *reloc = cmdbuf->reloc_idx;
264    uint64_t presumed_offset = wsbmBOOffsetHint(ref_buffer->drm_buf);
265
266    reloc->where = addr_in_dst_buffer - start_of_dst_buffer; /* Offset in DWORDs */
267
268    reloc->buffer = lnc_cmdbuf_buffer_ref(cmdbuf, ref_buffer);
269    ASSERT(reloc->buffer != -1);
270
271    reloc->reloc_op = PSB_RELOC_OP_OFFSET;
272#ifndef VA_EMULATOR
273    if (presumed_offset) {
274        uint32_t new_val =  presumed_offset + buf_offset;
275
276        new_val = ((new_val >> align_shift) << (align_shift << PSB_RELOC_ALSHIFT_SHIFT));
277        new_val = (background & ~mask) | (new_val & mask);
278        *addr_in_dst_buffer = new_val;
279    } else {
280        *addr_in_dst_buffer = PSB_RELOC_MAGIC;
281    }
282#else
283    /* indicate subscript of relocation buffer */
284    *addr_in_dst_buffer = reloc - (struct drm_psb_reloc *)cmdbuf->reloc_base;
285#endif
286    reloc->mask = mask;
287    reloc->shift = align_shift << PSB_RELOC_ALSHIFT_SHIFT;
288    reloc->pre_add = buf_offset;
289    reloc->background = background;
290    reloc->dst_buffer = dst_buffer;
291    cmdbuf->reloc_idx++;
292
293    ASSERT(((unsigned char *)(cmdbuf->reloc_idx)) < RELOC_END(cmdbuf));
294}
295
296/*
297 * Advances "obj_context" to the next cmdbuf
298 *
299 * Returns 0 on success
300 */
301int lnc_context_get_next_cmdbuf(object_context_p obj_context)
302{
303    lnc_cmdbuf_p cmdbuf;
304    int ret;
305
306    if (obj_context->lnc_cmdbuf) {
307        return 0;
308    }
309
310    obj_context->cmdbuf_current++;
311    if (obj_context->cmdbuf_current >= LNC_MAX_CMDBUFS_ENCODE) {
312        obj_context->cmdbuf_current = 0;
313    }
314
315    cmdbuf = obj_context->lnc_cmdbuf_list[obj_context->cmdbuf_current];
316    ret = lnc_cmdbuf_reset(cmdbuf);
317    if (!ret) {
318        /* Success */
319        obj_context->lnc_cmdbuf = cmdbuf;
320    }
321
322    /* added pic_params/slice_params into ref, so the index is 1/2 */
323    lnc_cmdbuf_buffer_ref(cmdbuf, &cmdbuf->pic_params);
324    lnc_cmdbuf_buffer_ref(cmdbuf, &cmdbuf->slice_params);
325
326    return ret;
327}
328
329/*
330 * This is the user-space do-it-all interface to the drm cmdbuf ioctl.
331 * It allows different buffers as command- and reloc buffer. A list of
332 * cliprects to apply and whether to copy the clipRect content to all
333 * scanout buffers (damage = 1).
334 */
335/*
336 * Don't add debug statements in this function, it gets called with the
337 * DRM lock held and output to an X terminal can cause X to deadlock
338 */
339static int
340lncDRMCmdBuf(int fd, int ioctl_offset, psb_buffer_p *buffer_list, int buffer_count, unsigned cmdBufHandle,
341             unsigned cmdBufOffset, unsigned cmdBufSize,
342             unsigned relocBufHandle, unsigned relocBufOffset,
343             unsigned numRelocs, int damage,
344             unsigned engine, unsigned fence_flags, struct psb_ttm_fence_rep *fence_rep)
345{
346    drm_psb_cmdbuf_arg_t ca;
347    struct psb_validate_arg *arg_list;
348    int i;
349    int ret;
350    uint64_t mask = PSB_GPU_ACCESS_MASK;
351
352    arg_list = (struct psb_validate_arg *) calloc(1, sizeof(struct psb_validate_arg) * buffer_count);
353    if (arg_list == NULL) {
354        drv_debug_msg(VIDEO_DEBUG_ERROR, "Allocate memory failed\n");
355        return -ENOMEM;
356    }
357
358    for (i = 0; i < buffer_count; i++) {
359        struct psb_validate_arg *arg = &(arg_list[i]);
360        struct psb_validate_req *req = &arg->d.req;
361
362        req->next = (unsigned long) & (arg_list[i+1]);
363
364        req->buffer_handle = wsbmKBufHandle(wsbmKBuf(buffer_list[i]->drm_buf));
365        req->group = 0;
366        req->set_flags = (PSB_GPU_ACCESS_READ | PSB_GPU_ACCESS_WRITE) & mask;
367        req->clear_flags = (~(PSB_GPU_ACCESS_READ | PSB_GPU_ACCESS_WRITE)) & mask;
368#if 1
369        req->presumed_gpu_offset = (uint64_t)wsbmBOOffsetHint(buffer_list[i]->drm_buf);
370        req->presumed_flags = PSB_USE_PRESUMED;
371        if ((req->presumed_gpu_offset >> 28) & 0x1) {
372            drv_debug_msg(VIDEO_DEBUG_ERROR, "buffer is at the address topaz can not access\n");
373            ret = -1;
374            goto out;
375        }
376#else
377        req->presumed_flags = 0;
378#endif
379        req->pad64 = (uint32_t)buffer_list[i]->pl_flags;
380    }
381    arg_list[buffer_count-1].d.req.next = 0;
382
383    ca.buffer_list = (uint64_t)((unsigned long)arg_list);
384    ca.cmdbuf_handle = cmdBufHandle;
385    ca.cmdbuf_offset = cmdBufOffset;
386    ca.cmdbuf_size = cmdBufSize;
387    ca.reloc_handle = relocBufHandle;
388    ca.reloc_offset = relocBufOffset;
389    ca.num_relocs = numRelocs;
390    ca.engine = engine;
391    ca.fence_flags = fence_flags;
392    ca.fence_arg = (uint64_t)((unsigned long)fence_rep);
393
394    do {
395        ret = drmCommandWrite(fd, ioctl_offset, &ca, sizeof(ca));
396    } while (ret == EAGAIN);
397
398    if (ret)
399        goto out;
400
401    for (i = 0; i < buffer_count; i++) {
402        struct psb_validate_arg *arg = &(arg_list[i]);
403        struct psb_validate_rep *rep = &arg->d.rep;
404
405        if (!arg->handled) {
406            ret = -EFAULT;
407            goto out;
408        }
409        if (arg->ret != 0) {
410            ret = arg->ret;
411            goto out;
412        }
413        wsbmUpdateKBuf(wsbmKBuf(buffer_list[i]->drm_buf),
414                       rep->gpu_offset, rep->placement, rep->fence_type_mask);
415    }
416out:
417    free(arg_list);
418    for (i = 0; i < buffer_count; i++) {
419        /*
420         * Buffer no longer queued in userspace
421         */
422        switch (buffer_list[i]->status) {
423        case psb_bs_queued:
424            buffer_list[i]->status = psb_bs_ready;
425            break;
426
427        case psb_bs_abandoned:
428            psb_buffer_destroy(buffer_list[i]);
429            free(buffer_list[i]);
430            break;
431
432        default:
433            /* Not supposed to happen */
434            ASSERT(0);
435        }
436    }
437
438    return ret;
439}
440
441#if 0
442static struct _WsbmFenceObject *
443lnc_fence_wait(psb_driver_data_p driver_data,
444               struct psb_ttm_fence_rep *fence_rep, int *status)
445
446{
447    struct _WsbmFenceObject *fence = NULL;
448    int ret = -1;
449
450    /* copy fence information */
451    if (fence_rep->error != 0) {
452        drv_debug_msg(VIDEO_DEBUG_ERROR, "drm failed to create a fence"
453                           " and has idled the HW\n");
454        DEBUG_FAILURE_RET;
455        return NULL;
456    }
457
458    fence = wsbmFenceCreate(driver_data->fence_mgr, fence_rep->fence_class,
459                            fence_rep->fence_type,
460                            (unsigned char *)fence_rep->handle,
461                            0);
462    if (fence)
463        *status = wsbmFenceFinish(fence, fence_rep->fence_type, 0);
464
465    return fence;
466}
467#endif
468
469/*
470 * Submits the current cmdbuf
471 *
472 * Returns 0 on success
473 */
474int lnc_context_submit_cmdbuf(object_context_p obj_context)
475{
476
477    return 0;
478}
479
480/*
481 * FrameSkip is only meaningful for RC enabled mode
482 * Topaz raises this flag after surface N encoding is finished (vaSyncSurface gets back)
483 * then for the next encode surface N+1 (ctx->src_surface) frameskip flag is cleared in vaBeginPicuture
484 * and is always set in vaEndPicture:lnc_PatchRCMode
485 * vaQuerySurfaceStatus is supposed only to be called after vaEndPicture/vaSyncSurface,
486 * The caller should ensure the surface pertains to an encode context
487 */
488int lnc_surface_get_frameskip(psb_driver_data_p driver_data, psb_surface_p surface, int *frame_skip)
489{
490    struct drm_lnc_video_getparam_arg arg;
491    unsigned long temp;
492    int ret = 0;
493
494    /* bit31 indicate if frameskip is already settled, it is used to record the frame skip flag for old surfaces
495     * because current FRAMESKIP in hardware can't be applied to the old surfaces
496     * bit31 is cleared when the surface is used as encode render target or reference/reconstrucure target
497     */
498    if (GET_SURFACE_INFO_skipped_flag(surface) & SURFACE_INFO_SKIP_FLAG_SETTLED) {
499        *frame_skip = GET_SURFACE_INFO_skipped_flag(surface) & 1;
500        return 0;
501    }
502
503    /* not settled, we get it from current HW FRAMESKIP flag */
504    arg.key = LNC_VIDEO_FRAME_SKIP;
505    arg.value = (uint64_t)((unsigned long) & temp);
506    ret = drmCommandWriteRead(driver_data->drm_fd, driver_data->getParamIoctlOffset,
507                              &arg, sizeof(arg));
508    if (ret == 0) {
509        SET_SURFACE_INFO_skipped_flag(surface, temp);
510        *frame_skip = temp;
511        if (temp == 1)
512            drv_debug_msg(VIDEO_DEBUG_GENERAL, "Detected a skipped frame for encode\n");
513    }
514
515    return ret;
516}
517
518
519/*
520 * Flushes all cmdbufs
521 */
522int lnc_context_flush_cmdbuf(object_context_p obj_context)
523{
524    lnc_cmdbuf_p cmdbuf = obj_context->lnc_cmdbuf;
525    psb_driver_data_p driver_data = obj_context->driver_data;
526    unsigned int fence_flags;
527    struct psb_ttm_fence_rep fence_rep;
528    unsigned int reloc_offset;
529    unsigned int num_relocs;
530    int ret;
531    unsigned int cmdbuffer_size = (unsigned char *) cmdbuf->cmd_idx - cmdbuf->cmd_start; /* In bytes */
532
533    ASSERT(cmdbuffer_size < CMD_SIZE);
534    ASSERT((unsigned char *) cmdbuf->cmd_idx < CMD_END(cmdbuf));
535    /* LOCK */
536    ret = LOCK_HARDWARE(driver_data);
537    if (ret) {
538        UNLOCK_HARDWARE(driver_data);
539        DEBUG_FAILURE_RET;
540        return ret;
541    }
542
543    /* Now calculate the total number of relocations */
544    reloc_offset = cmdbuf->reloc_base - cmdbuf->cmd_base;
545    num_relocs = (((unsigned char *) cmdbuf->reloc_idx) - cmdbuf->reloc_base) / sizeof(struct drm_psb_reloc);
546
547    lnc_cmdbuf_unmap(cmdbuf);
548
549    ASSERT(NULL == cmdbuf->reloc_base);
550
551    if (psb_video_trace_fp)
552        fence_flags = 0;
553    else
554        fence_flags = DRM_PSB_FENCE_NO_USER;
555
556#ifndef LNC_ENGINE_ENCODE
557#define LNC_ENGINE_ENCODE  5
558#endif
559
560    wsbmWriteLockKernelBO();
561    ret = lncDRMCmdBuf(driver_data->drm_fd, driver_data->execIoctlOffset,
562                       cmdbuf->buffer_refs, cmdbuf->buffer_refs_count, wsbmKBufHandle(wsbmKBuf(cmdbuf->buf.drm_buf)),
563                       0, cmdbuffer_size,/*unsigned cmdBufSize*/
564                       wsbmKBufHandle(wsbmKBuf(cmdbuf->buf.drm_buf)), reloc_offset, num_relocs,
565                       0, LNC_ENGINE_ENCODE, fence_flags, &fence_rep);
566    wsbmWriteUnlockKernelBO();
567    UNLOCK_HARDWARE(driver_data);
568
569    if (ret) {
570        obj_context->lnc_cmdbuf = NULL;
571
572        DEBUG_FAILURE_RET;
573        return ret;
574    }
575
576#if 0
577    int status = -1;
578    struct _WsbmFenceObject *fence = NULL;
579
580    fence = lnc_fence_wait(driver_data, &fence_rep, &status);
581    drv_debug_msg(VIDEO_DEBUG_GENERAL, "psb_fence_wait returns: %d (fence=0x%08x)\n", status, fence);
582
583    if (fence)
584        wsbmFenceUnreference(fence);
585#endif
586
587    obj_context->lnc_cmdbuf = NULL;
588
589    return 0;
590}
591
592