11b362b15af34006e6a11974088a46d42b903418eJohann/*
21b362b15af34006e6a11974088a46d42b903418eJohann *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
31b362b15af34006e6a11974088a46d42b903418eJohann *
41b362b15af34006e6a11974088a46d42b903418eJohann *  Use of this source code is governed by a BSD-style license
51b362b15af34006e6a11974088a46d42b903418eJohann *  that can be found in the LICENSE file in the root of the source
61b362b15af34006e6a11974088a46d42b903418eJohann *  tree. An additional intellectual property rights grant can be found
71b362b15af34006e6a11974088a46d42b903418eJohann *  in the file PATENTS.  All contributing project authors may
81b362b15af34006e6a11974088a46d42b903418eJohann *  be found in the AUTHORS file in the root of the source tree.
91b362b15af34006e6a11974088a46d42b903418eJohann */
101b362b15af34006e6a11974088a46d42b903418eJohann
111b362b15af34006e6a11974088a46d42b903418eJohann
121b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_config.h"
13ba164dffc5a6795bce97fae02b51ccf3330e15e4hkuang#include "vp8_rtcd.h"
141b362b15af34006e6a11974088a46d42b903418eJohann#include "vpx_ports/x86.h"
151b362b15af34006e6a11974088a46d42b903418eJohann#include "vp8/encoder/block.h"
161b362b15af34006e6a11974088a46d42b903418eJohann
171b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_short_fdct4x4_mmx(short *input, short *output, int pitch);
181b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
191b362b15af34006e6a11974088a46d42b903418eJohann{
201b362b15af34006e6a11974088a46d42b903418eJohann    vp8_short_fdct4x4_mmx(input,   output,    pitch);
211b362b15af34006e6a11974088a46d42b903418eJohann    vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
221b362b15af34006e6a11974088a46d42b903418eJohann}
231b362b15af34006e6a11974088a46d42b903418eJohann
241b362b15af34006e6a11974088a46d42b903418eJohannint vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
251b362b15af34006e6a11974088a46d42b903418eJohann                                 short *qcoeff_ptr, short *dequant_ptr,
261b362b15af34006e6a11974088a46d42b903418eJohann                                 const short *scan_mask, short *round_ptr,
271b362b15af34006e6a11974088a46d42b903418eJohann                                 short *quant_ptr, short *dqcoeff_ptr);
281b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
291b362b15af34006e6a11974088a46d42b903418eJohann{
301b362b15af34006e6a11974088a46d42b903418eJohann    const short *scan_mask   = vp8_default_zig_zag_mask;
311b362b15af34006e6a11974088a46d42b903418eJohann    short *coeff_ptr   = b->coeff;
321b362b15af34006e6a11974088a46d42b903418eJohann    short *zbin_ptr    = b->zbin;
331b362b15af34006e6a11974088a46d42b903418eJohann    short *round_ptr   = b->round;
341b362b15af34006e6a11974088a46d42b903418eJohann    short *quant_ptr   = b->quant_fast;
351b362b15af34006e6a11974088a46d42b903418eJohann    short *qcoeff_ptr  = d->qcoeff;
361b362b15af34006e6a11974088a46d42b903418eJohann    short *dqcoeff_ptr = d->dqcoeff;
371b362b15af34006e6a11974088a46d42b903418eJohann    short *dequant_ptr = d->dequant;
381b362b15af34006e6a11974088a46d42b903418eJohann
391b362b15af34006e6a11974088a46d42b903418eJohann    *d->eob = (char)vp8_fast_quantize_b_impl_mmx(
401b362b15af34006e6a11974088a46d42b903418eJohann                                                 coeff_ptr,
411b362b15af34006e6a11974088a46d42b903418eJohann                                                 zbin_ptr,
421b362b15af34006e6a11974088a46d42b903418eJohann                                                 qcoeff_ptr,
431b362b15af34006e6a11974088a46d42b903418eJohann                                                 dequant_ptr,
441b362b15af34006e6a11974088a46d42b903418eJohann                                                 scan_mask,
451b362b15af34006e6a11974088a46d42b903418eJohann
461b362b15af34006e6a11974088a46d42b903418eJohann                                                 round_ptr,
471b362b15af34006e6a11974088a46d42b903418eJohann                                                 quant_ptr,
481b362b15af34006e6a11974088a46d42b903418eJohann                                                 dqcoeff_ptr
491b362b15af34006e6a11974088a46d42b903418eJohann                                                 );
501b362b15af34006e6a11974088a46d42b903418eJohann}
511b362b15af34006e6a11974088a46d42b903418eJohann
521b362b15af34006e6a11974088a46d42b903418eJohannint vp8_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
531b362b15af34006e6a11974088a46d42b903418eJohannint vp8_mbblock_error_mmx(MACROBLOCK *mb, int dc)
541b362b15af34006e6a11974088a46d42b903418eJohann{
551b362b15af34006e6a11974088a46d42b903418eJohann    short *coeff_ptr =  mb->block[0].coeff;
561b362b15af34006e6a11974088a46d42b903418eJohann    short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff;
571b362b15af34006e6a11974088a46d42b903418eJohann    return vp8_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr, dc);
581b362b15af34006e6a11974088a46d42b903418eJohann}
591b362b15af34006e6a11974088a46d42b903418eJohann
601b362b15af34006e6a11974088a46d42b903418eJohannint vp8_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
611b362b15af34006e6a11974088a46d42b903418eJohannint vp8_mbuverror_mmx(MACROBLOCK *mb)
621b362b15af34006e6a11974088a46d42b903418eJohann{
631b362b15af34006e6a11974088a46d42b903418eJohann    short *s_ptr = &mb->coeff[256];
641b362b15af34006e6a11974088a46d42b903418eJohann    short *d_ptr = &mb->e_mbd.dqcoeff[256];
651b362b15af34006e6a11974088a46d42b903418eJohann    return vp8_mbuverror_mmx_impl(s_ptr, d_ptr);
661b362b15af34006e6a11974088a46d42b903418eJohann}
671b362b15af34006e6a11974088a46d42b903418eJohann
681b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride,
691b362b15af34006e6a11974088a46d42b903418eJohann                             short *diff, unsigned char *predictor,
701b362b15af34006e6a11974088a46d42b903418eJohann                             int pitch);
711b362b15af34006e6a11974088a46d42b903418eJohannvoid vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
721b362b15af34006e6a11974088a46d42b903418eJohann{
731b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *z = *(be->base_src) + be->src;
741b362b15af34006e6a11974088a46d42b903418eJohann    unsigned int  src_stride = be->src_stride;
751b362b15af34006e6a11974088a46d42b903418eJohann    short *diff = &be->src_diff[0];
761b362b15af34006e6a11974088a46d42b903418eJohann    unsigned char *predictor = &bd->predictor[0];
771b362b15af34006e6a11974088a46d42b903418eJohann    vp8_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch);
781b362b15af34006e6a11974088a46d42b903418eJohann}
79