1d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org/*
2d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *
4d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *  Use of this source code is governed by a BSD-style license
5d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *  that can be found in the LICENSE file in the root of the source
6d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *  tree. An additional intellectual property rights grant can be found
7d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *  in the file PATENTS.  All contributing project authors may
8d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org *  be found in the AUTHORS file in the root of the source tree.
9d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org */
10d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
11d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org#include <arm_neon.h>
12d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
13d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org#include "vp8/common/blockd.h"
14d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
15d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgvoid vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x,
16d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                           unsigned char * yabove_row,
17d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                           unsigned char * yleft,
18d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                           int left_stride,
19d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                           unsigned char * ypred_ptr,
20d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                           int y_stride) {
21d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  const int mode = x->mode_info_context->mbmi.mode;
22d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  int i;
23d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
24d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  switch (mode) {
25d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    case DC_PRED:
26d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    {
27d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      int shift = x->up_available + x->left_available;
28d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      uint8x16_t v_expected_dc = vdupq_n_u8(128);
29d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
30d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      if (shift) {
31d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        unsigned int average = 0;
32d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        int expected_dc;
33d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        if (x->up_available) {
34d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint8x16_t v_above = vld1q_u8(yabove_row);
35d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint16x8_t a = vpaddlq_u8(v_above);
36d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint32x4_t b = vpaddlq_u16(a);
37d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint64x2_t c = vpaddlq_u32(b);
38d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
39d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                        vreinterpret_u32_u64(vget_high_u64(c)));
40d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          average = vget_lane_u32(d, 0);
41d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        }
42d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        if (x->left_available) {
43d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          for (i = 0; i < 16; ++i) {
44d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org              average += yleft[0];
45d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org              yleft += left_stride;
46d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          }
47d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        }
48d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        shift += 3;
49d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        expected_dc = (average + (1 << (shift - 1))) >> shift;
50d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        v_expected_dc = vmovq_n_u8((uint8_t)expected_dc);
51d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
52d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < 16; ++i) {
53d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1q_u8(ypred_ptr, v_expected_dc);
54d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        ypred_ptr += y_stride;
55d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
56d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
57d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    break;
58d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    case V_PRED:
59d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    {
60d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      const uint8x16_t v_above = vld1q_u8(yabove_row);
61d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < 16; ++i) {
62d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1q_u8(ypred_ptr, v_above);
63d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        ypred_ptr += y_stride;
64d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
65d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
66d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    break;
67d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    case H_PRED:
68d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    {
69d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < 16; ++i) {
70d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x16_t v_yleft = vmovq_n_u8((uint8_t)yleft[0]);
71d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        yleft += left_stride;
72d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1q_u8(ypred_ptr, v_yleft);
73d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        ypred_ptr += y_stride;
74d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
75d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
76d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    break;
77d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    case TM_PRED:
78d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    {
79d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      const uint16x8_t v_ytop_left = vmovq_n_u16((int16_t)yabove_row[-1]);
80d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      const uint8x16_t v_above = vld1q_u8(yabove_row);
81d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < 16; ++i) {
82d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x8_t v_yleft = vmov_n_u8((int8_t)yleft[0]);
83d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint16x8_t a_lo = vaddl_u8(vget_low_u8(v_above), v_yleft);
84d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint16x8_t a_hi = vaddl_u8(vget_high_u8(v_above), v_yleft);
85d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const int16x8_t b_lo = vsubq_s16(vreinterpretq_s16_u16(a_lo),
86d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                         vreinterpretq_s16_u16(v_ytop_left));
87d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const int16x8_t b_hi = vsubq_s16(vreinterpretq_s16_u16(a_hi),
88d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                         vreinterpretq_s16_u16(v_ytop_left));
89d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x8_t pred_lo = vqmovun_s16(b_lo);
90d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x8_t pred_hi = vqmovun_s16(b_hi);
91d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
92d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1q_u8(ypred_ptr, vcombine_u8(pred_lo, pred_hi));
93d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        ypred_ptr += y_stride;
94d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        yleft += left_stride;
95d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
96d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
97d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    break;
98d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  }
99d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org}
100d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
101d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.orgvoid vp8_build_intra_predictors_mbuv_s_neon(MACROBLOCKD *x,
102d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                            unsigned char * uabove_row,
103d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                            unsigned char * vabove_row,
104d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                            unsigned char * uleft,
105d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                            unsigned char * vleft,
106d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                            int left_stride,
107d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                            unsigned char * upred_ptr,
108d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                            unsigned char * vpred_ptr,
109d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                            int pred_stride) {
110d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  const int mode = x->mode_info_context->mbmi.uv_mode;
111d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  int i;
112d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
113d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  switch (mode) {
114d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    case DC_PRED:
115d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    {
116d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      int shift = x->up_available + x->left_available;
117d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      uint8x8_t v_expected_udc = vdup_n_u8(128);
118d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      uint8x8_t v_expected_vdc = vdup_n_u8(128);
119d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
120d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      if (shift) {
121d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        unsigned int average_u = 0;
122d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        unsigned int average_v = 0;
123d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        int expected_udc;
124d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        int expected_vdc;
125d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        if (x->up_available) {
126d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint8x8_t v_uabove = vld1_u8(uabove_row);
127d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint8x8_t v_vabove = vld1_u8(vabove_row);
128d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint16x8_t a = vpaddlq_u8(vcombine_u8(v_uabove, v_vabove));
129d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint32x4_t b = vpaddlq_u16(a);
130d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          const uint64x2_t c = vpaddlq_u32(b);
131d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          average_u = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 0);
132d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          average_v = vgetq_lane_u32(vreinterpretq_u32_u64((c)), 2);
133d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        }
134d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        if (x->left_available) {
135d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          for (i = 0; i < 8; ++i) {
136d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org              average_u += uleft[0];
137d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org              uleft += left_stride;
138d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org              average_v += vleft[0];
139d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org              vleft += left_stride;
140d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org          }
141d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        }
142d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        shift += 2;
143d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        expected_udc = (average_u + (1 << (shift - 1))) >> shift;
144d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        expected_vdc = (average_v + (1 << (shift - 1))) >> shift;
145d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        v_expected_udc = vmov_n_u8((uint8_t)expected_udc);
146d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        v_expected_vdc = vmov_n_u8((uint8_t)expected_vdc);
147d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
148d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < 8; ++i) {
149d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1_u8(upred_ptr, v_expected_udc);
150d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        upred_ptr += pred_stride;
151d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1_u8(vpred_ptr, v_expected_vdc);
152d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vpred_ptr += pred_stride;
153d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
154d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
155d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    break;
156d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    case V_PRED:
157d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    {
158d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      const uint8x8_t v_uabove = vld1_u8(uabove_row);
159d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      const uint8x8_t v_vabove = vld1_u8(vabove_row);
160d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < 8; ++i) {
161d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1_u8(upred_ptr, v_uabove);
162d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        upred_ptr += pred_stride;
163d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1_u8(vpred_ptr, v_vabove);
164d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vpred_ptr += pred_stride;
165d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
166d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
167d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    break;
168d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    case H_PRED:
169d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    {
170d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < 8; ++i) {
171d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x8_t v_uleft = vmov_n_u8((uint8_t)uleft[0]);
172d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x8_t v_vleft = vmov_n_u8((uint8_t)vleft[0]);
173d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        uleft += left_stride;
174d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vleft += left_stride;
175d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1_u8(upred_ptr, v_uleft);
176d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        upred_ptr += pred_stride;
177d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1_u8(vpred_ptr, v_vleft);
178d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vpred_ptr += pred_stride;
179d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
180d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
181d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    break;
182d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    case TM_PRED:
183d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    {
184d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      const uint16x8_t v_utop_left = vmovq_n_u16((int16_t)uabove_row[-1]);
185d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      const uint16x8_t v_vtop_left = vmovq_n_u16((int16_t)vabove_row[-1]);
186d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      const uint8x8_t v_uabove = vld1_u8(uabove_row);
187d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      const uint8x8_t v_vabove = vld1_u8(vabove_row);
188d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      for (i = 0; i < 8; ++i) {
189d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x8_t v_uleft = vmov_n_u8((int8_t)uleft[0]);
190d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x8_t v_vleft = vmov_n_u8((int8_t)vleft[0]);
191d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint16x8_t a_u = vaddl_u8(v_uabove, v_uleft);
192d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint16x8_t a_v = vaddl_u8(v_vabove, v_vleft);
193d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const int16x8_t b_u = vsubq_s16(vreinterpretq_s16_u16(a_u),
194d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                        vreinterpretq_s16_u16(v_utop_left));
195d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const int16x8_t b_v = vsubq_s16(vreinterpretq_s16_u16(a_v),
196d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org                                        vreinterpretq_s16_u16(v_vtop_left));
197d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x8_t pred_u = vqmovun_s16(b_u);
198d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        const uint8x8_t pred_v = vqmovun_s16(b_v);
199d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org
200d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1_u8(upred_ptr, pred_u);
201d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vst1_u8(vpred_ptr, pred_v);
202d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        upred_ptr += pred_stride;
203d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vpred_ptr += pred_stride;
204d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        uleft += left_stride;
205d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org        vleft += left_stride;
206d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org      }
207d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    }
208d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org    break;
209d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org  }
210d95585fb0ec024f6abd96f7b02e0df58019d46afjohannkoenig@chromium.org}
211