ih264e_mc.c revision 3749f6f435e79624f72841e866245d84195551cd
1/****************************************************************************** 2 * 3 * Copyright (C) 2015 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19*/ 20 21/** 22 ******************************************************************************* 23 * @file 24 * ih264e_mc.c 25 * 26 * @brief 27 * Contains definition of functions for motion compensation 28 * 29 * @author 30 * ittiam 31 * 32 * @par List of Functions: 33 * - ih264e_motion_comp_luma() 34 * - ih264e_motion_comp_chroma() 35 * 36 * @remarks 37 * None 38 * 39 ******************************************************************************* 40 */ 41 42/*****************************************************************************/ 43/* File Includes */ 44/*****************************************************************************/ 45 46/* System include files */ 47#include <stdio.h> 48 49/* User include files */ 50#include "ih264_typedefs.h" 51#include "ih264_defs.h" 52#include "iv2.h" 53#include "ive2.h" 54#include "ime_distortion_metrics.h" 55#include "ime_defs.h" 56#include "ime_structs.h" 57#include "ih264_structs.h" 58#include "ih264_inter_pred_filters.h" 59#include "ih264_mem_fns.h" 60#include "ih264_padding.h" 61#include "ih264_intra_pred_filters.h" 62#include "ih264_deblk_edge_filters.h" 63#include "ih264_trans_quant_itrans_iquant.h" 64#include "ih264_cabac_tables.h" 65#include "ih264e_defs.h" 66#include "ih264e_error.h" 67#include "ih264e_bitstream.h" 68#include "irc_cntrl_param.h" 69#include "irc_frame_info_collector.h" 70#include "ih264e_rate_control.h" 71#include "ih264e_cabac_structs.h" 72#include "ih264e_structs.h" 73#include "ih264e_mc.h" 74#include "ih264e_half_pel.h" 75 76/*****************************************************************************/ 77/* Function Definitions */ 78/*****************************************************************************/ 79 80/** 81 ****************************************************************************** 82 * 83 * @brief 84 * performs motion compensation for a luma mb for the given mv. 85 * 86 * @par Description 87 * This routine performs motion compensation of an inter mb. When the inter 88 * mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer 89 * to pred buffer. In this case the function returns pointer and stride of the 90 * ref. buffer and this info is used in place of pred buffer else where. 91 * In other cases, the pred buffer is populated via copy / filtering + copy 92 * (q pel cases) and returned. 93 * 94 * @param[in] ps_proc 95 * pointer to current proc ctxt 96 * 97 * @param[out] pu1_pseudo_pred 98 * pseudo prediction buffer 99 * 100 * @param[out] u4_pseudo_pred_strd 101 * pseudo pred buffer stride 102 * 103 * @return none 104 * 105 * @remarks Assumes half pel buffers for the entire frame are populated. 106 * 107 ****************************************************************************** 108 */ 109void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, UWORD8 **pu1_pseudo_pred, 110 WORD32 *pi4_pseudo_pred_strd) 111{ 112 /* codec context */ 113 codec_t *ps_codec = ps_proc->ps_codec; 114 115 /* me ctxt */ 116 me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt; 117 118 /* Pointer to the structure having motion vectors, size and position of curr partitions */ 119 enc_pu_t *ps_curr_pu; 120 121 /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */ 122 UWORD8 *pu1_ref[4]; 123 124 /* pred buffer ptr */ 125 UWORD8 *pu1_pred; 126 127 /* strides of full pel, half pel x, half pel y, half pel xy reference buffer */ 128 WORD32 i4_ref_strd[4]; 129 130 /* pred buffer stride */ 131 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 132 133 /* full pel motion vectors */ 134 WORD32 u4_mv_x_full, u4_mv_y_full; 135 136 /* half pel motion vectors */ 137 WORD32 u4_mv_x_hpel, u4_mv_y_hpel; 138 139 /* quarter pel motion vectors */ 140 WORD32 u4_mv_x_qpel, u4_mv_y_qpel; 141 142 /* width & height of the partition */ 143 UWORD32 wd, ht; 144 145 /* partition idx */ 146 UWORD32 u4_num_prtn; 147 148 /* half / qpel coefficient */ 149 UWORD32 u4_subpel_factor; 150 151 /* BIPRED Flag */ 152 WORD32 i4_bipred_flag; 153 154 /* temp var */ 155 UWORD32 u4_lkup_idx1; 156 157 /* Init */ 158 i4_ref_strd[0] = ps_proc->i4_rec_strd; 159 160 i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] = 161 ps_me_ctxt->u4_subpel_buf_strd; 162 163 for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; 164 u4_num_prtn++) 165 { 166 mv_t *ps_curr_mv; 167 168 /* update ptr to curr partition */ 169 ps_curr_pu = ps_proc->ps_pu + u4_num_prtn; 170 171 /* Set no no bipred */ 172 i4_bipred_flag = 0; 173 174 switch (ps_curr_pu->b2_pred_mode) 175 { 176 case PRED_L0: 177 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv; 178 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0]; 179 break; 180 181 case PRED_L1: 182 ps_curr_mv = &ps_curr_pu->s_me_info[1].s_mv; 183 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[1]; 184 break; 185 186 case PRED_BI: 187 /* 188 * In case of PRED_BI, we only need to ensure that 189 * the reference buffer that gets selected is 190 * ps_proc->pu1_best_subpel_buf 191 */ 192 193 /* Dummy */ 194 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv; 195 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0]; 196 197 i4_bipred_flag = 1; 198 break; 199 200 default: 201 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv; 202 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0]; 203 break; 204 205 } 206 207 /* get full pel mv's (full pel units) */ 208 u4_mv_x_full = ps_curr_mv->i2_mvx >> 2; 209 u4_mv_y_full = ps_curr_mv->i2_mvy >> 2; 210 211 /* get half pel mv's */ 212 u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; 213 u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; 214 215 /* get quarter pel mv's */ 216 u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); 217 u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); 218 219 /* width and height of partition */ 220 wd = (ps_curr_pu->b4_wd + 1) << 2; 221 ht = (ps_curr_pu->b4_ht + 1) << 2; 222 223 /* decision ? qpel/hpel, fpel */ 224 u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2) 225 + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel); 226 227 /* Move ref to position given by MV */ 228 pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full); 229 230 /* Sub pel ptrs/ Biperd pointers init */ 231 pu1_ref[1] = ps_proc->pu1_best_subpel_buf; 232 i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd; 233 234 /* update pred buff ptr */ 235 pu1_pred = ps_proc->pu1_pred_mb 236 + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd 237 + 4 * ps_curr_pu->b4_pos_x; 238 239 /* u4_lkup_idx1 will be non zero for half pel and bipred */ 240 u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag; 241 242 { 243 /********************************************************************/ 244 /* if the block is P16x16 MB and mv are not quarter pel motion */ 245 /* vectors, there is no need to copy 16x16 unit from reference frame*/ 246 /* to pred buffer. We might as well send the reference frame buffer */ 247 /* pointer as pred buffer (ofc with updated stride) to fwd transform*/ 248 /* and inverse transform unit. */ 249 /********************************************************************/ 250 if (ps_proc->u4_num_sub_partitions == 1) 251 { 252 *pu1_pseudo_pred = pu1_ref[u4_lkup_idx1]; 253 *pi4_pseudo_pred_strd = i4_ref_strd[u4_lkup_idx1]; 254 255 } 256 /* 257 * Copying half pel or full pel to prediction buffer 258 * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only support 16x16 in P mbs 259 */ 260 else 261 { 262 ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1], 263 pu1_pred, 264 i4_ref_strd[u4_lkup_idx1], 265 i4_pred_strd, ht, wd, NULL, 266 0); 267 } 268 269 } 270 } 271} 272 273/** 274 ****************************************************************************** 275 * 276 * @brief 277 * performs motion compensation for chroma mb 278 * 279 * @par Description 280 * Copies a MB of data from the reference buffer (Full pel, half pel or q pel) 281 * according to the motion vectors given 282 * 283 * @param[in] ps_proc 284 * pointer to current proc ctxt 285 * 286 * @return none 287 * 288 * @remarks Assumes half pel and quarter pel buffers for the entire frame are 289 * populated. 290 ****************************************************************************** 291 */ 292void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc) 293{ 294 /* codec context */ 295 codec_t *ps_codec = ps_proc->ps_codec; 296 297 /* Pointer to the structure having motion vectors, size and position of curr partitions */ 298 enc_pu_t *ps_curr_pu; 299 300 /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */ 301 UWORD8 *pu1_ref; 302 303 /* pred buffer ptr */ 304 UWORD8 *pu1_pred; 305 306 /* strides of full pel reference buffer */ 307 WORD32 i4_ref_strd = ps_proc->i4_rec_strd; 308 309 /* pred buffer stride */ 310 WORD32 i4_pred_strd = ps_proc->i4_pred_strd; 311 312 /* full pel motion vectors */ 313 WORD32 u4_mv_x_full, u4_mv_y_full; 314 315 /* half pel motion vectors */ 316 WORD32 u4_mv_x_hpel, u4_mv_y_hpel; 317 318 /* quarter pel motion vectors */ 319 WORD32 u4_mv_x_qpel, u4_mv_y_qpel; 320 321 /* width & height of the partition */ 322 UWORD32 wd, ht; 323 324 /* partition idx */ 325 UWORD32 u4_num_prtn; 326 327 WORD32 u4_mv_x; 328 WORD32 u4_mv_y; 329 UWORD8 u1_dx, u1_dy; 330 331 for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions; 332 u4_num_prtn++) 333 { 334 mv_t *ps_curr_mv; 335 336 ps_curr_pu = ps_proc->ps_pu + u4_num_prtn; 337 338 if (ps_curr_pu->b2_pred_mode != PRED_BI) 339 { 340 ps_curr_mv = &ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv; 341 pu1_ref = ps_proc->apu1_ref_buf_chroma[ps_curr_pu->b2_pred_mode]; 342 343 u4_mv_x = ps_curr_mv->i2_mvx >> 3; 344 u4_mv_y = ps_curr_mv->i2_mvy >> 3; 345 346 /* corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed wiith dx, dy =4 */ 347 u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2; 348 u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2; 349 350 /* get half pel mv's */ 351 u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; 352 u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; 353 354 /* get quarter pel mv's */ 355 u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); 356 u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); 357 358 /* width and height of sub macro block */ 359 wd = (ps_curr_pu->b4_wd + 1) << 1; 360 ht = (ps_curr_pu->b4_ht + 1) << 1; 361 362 /* move the pointers so that they point to the motion compensated locations */ 363 pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1)); 364 365 pu1_pred = ps_proc->pu1_pred_mb 366 + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd 367 + 2 * ps_curr_pu->b4_pos_x; 368 369 u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel); 370 u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel); 371 372 /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with 373 * separate functions for better performance 374 * 375 * ih264_inter_pred_chroma_dx_zero_a9q 376 * and 377 * ih264_inter_pred_chroma_dy_zero_a9q 378 */ 379 380 ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd, 381 i4_pred_strd, u1_dx, u1_dy, ht, wd); 382 } 383 else /* If the pred mode is PRED_BI */ 384 { 385 /* 386 * We need to interpolate the L0 and L1 ref pics with the chorma MV 387 * then use them to average for bilinrar interpred 388 */ 389 WORD32 i4_predmode; 390 UWORD8 *pu1_ref_buf[2]; 391 392 /* Temporary buffers to store the interpolated value from L0 and L1 */ 393 pu1_ref_buf[PRED_L0] = ps_proc->apu1_subpel_buffs[0]; 394 pu1_ref_buf[PRED_L1] = ps_proc->apu1_subpel_buffs[1]; 395 396 397 for (i4_predmode = 0; i4_predmode < PRED_BI; i4_predmode++) 398 { 399 ps_curr_mv = &ps_curr_pu->s_me_info[i4_predmode].s_mv; 400 pu1_ref = ps_proc->apu1_ref_buf_chroma[i4_predmode]; 401 402 u4_mv_x = ps_curr_mv->i2_mvx >> 3; 403 u4_mv_y = ps_curr_mv->i2_mvy >> 3; 404 405 /* 406 * corresponds to full pel motion vector in luma, but in chroma 407 * corresponds to pel formed wiith dx, dy =4 408 */ 409 u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2; 410 u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2; 411 412 /* get half pel mv's */ 413 u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1; 414 u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1; 415 416 /* get quarter pel mv's */ 417 u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1); 418 u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1); 419 420 /* width and height of sub macro block */ 421 wd = (ps_curr_pu->b4_wd + 1) << 1; 422 ht = (ps_curr_pu->b4_ht + 1) << 1; 423 424 /* move the pointers so that they point to the motion compensated locations */ 425 pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1)); 426 427 pu1_pred = ps_proc->pu1_pred_mb 428 + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd 429 + 2 * ps_curr_pu->b4_pos_x; 430 431 u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) 432 + (u4_mv_x_qpel); 433 u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) 434 + (u4_mv_y_qpel); 435 436 ps_codec->pf_inter_pred_chroma(pu1_ref, 437 pu1_ref_buf[i4_predmode], 438 i4_ref_strd, MB_SIZE, u1_dx, 439 u1_dy, ht, wd); 440 } 441 442 ps_codec->pf_inter_pred_luma_bilinear(pu1_ref_buf[PRED_L0], 443 pu1_ref_buf[PRED_L1], pu1_pred, 444 MB_SIZE, MB_SIZE, 445 i4_pred_strd, MB_SIZE >> 1, 446 MB_SIZE); 447 } 448 } 449} 450