sad8_neon.asm revision 90d3ed91ae9228e1c8bab561b6138d4cb8c1e4fd
1; 2; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license and patent 5; grant that can be found in the LICENSE file in the root of the source 6; tree. All contributing project authors may be found in the AUTHORS 7; file in the root of the source tree. 8; 9 10 11 EXPORT |vp8_sad8x8_neon| 12 EXPORT |vp8_sad8x16_neon| 13 EXPORT |vp8_sad4x4_neon| 14 15 ARM 16 REQUIRE8 17 PRESERVE8 18 19 AREA ||.text||, CODE, READONLY, ALIGN=2 20; unsigned int vp8_sad8x8_c( 21; unsigned char *src_ptr, 22; int src_stride, 23; unsigned char *ref_ptr, 24; int ref_stride) 25 26|vp8_sad8x8_neon| PROC 27 vld1.8 {d0}, [r0], r1 28 vld1.8 {d8}, [r2], r3 29 30 vld1.8 {d2}, [r0], r1 31 vld1.8 {d10}, [r2], r3 32 33 vabdl.u8 q12, d0, d8 34 35 vld1.8 {d4}, [r0], r1 36 vld1.8 {d12}, [r2], r3 37 38 vabal.u8 q12, d2, d10 39 40 vld1.8 {d6}, [r0], r1 41 vld1.8 {d14}, [r2], r3 42 43 vabal.u8 q12, d4, d12 44 45 vld1.8 {d0}, [r0], r1 46 vld1.8 {d8}, [r2], r3 47 48 vabal.u8 q12, d6, d14 49 50 vld1.8 {d2}, [r0], r1 51 vld1.8 {d10}, [r2], r3 52 53 vabal.u8 q12, d0, d8 54 55 vld1.8 {d4}, [r0], r1 56 vld1.8 {d12}, [r2], r3 57 58 vabal.u8 q12, d2, d10 59 60 vld1.8 {d6}, [r0], r1 61 vld1.8 {d14}, [r2], r3 62 63 vabal.u8 q12, d4, d12 64 vabal.u8 q12, d6, d14 65 66 vpaddl.u16 q1, q12 67 vpaddl.u32 q0, q1 68 vadd.u32 d0, d0, d1 69 70 vmov.32 r0, d0[0] 71 72 bx lr 73 74 ENDP 75 76;============================ 77;unsigned int vp8_sad8x16_c( 78; unsigned char *src_ptr, 79; int src_stride, 80; unsigned char *ref_ptr, 81; int ref_stride) 82 83|vp8_sad8x16_neon| PROC 84 vld1.8 {d0}, [r0], r1 85 vld1.8 {d8}, [r2], r3 86 87 vld1.8 {d2}, [r0], r1 88 vld1.8 {d10}, [r2], r3 89 90 vabdl.u8 q12, d0, d8 91 92 vld1.8 {d4}, [r0], r1 93 vld1.8 {d12}, [r2], r3 94 95 vabal.u8 q12, d2, d10 96 97 vld1.8 {d6}, [r0], r1 98 vld1.8 {d14}, [r2], r3 99 100 vabal.u8 q12, d4, d12 101 102 vld1.8 {d0}, [r0], r1 103 vld1.8 {d8}, [r2], r3 104 105 vabal.u8 q12, d6, d14 106 107 vld1.8 {d2}, [r0], r1 108 vld1.8 {d10}, [r2], r3 109 110 vabal.u8 q12, d0, d8 111 112 vld1.8 {d4}, [r0], r1 113 vld1.8 {d12}, [r2], r3 114 115 vabal.u8 q12, d2, d10 116 117 vld1.8 {d6}, [r0], r1 118 vld1.8 {d14}, [r2], r3 119 120 vabal.u8 q12, d4, d12 121 122 vld1.8 {d0}, [r0], r1 123 vld1.8 {d8}, [r2], r3 124 125 vabal.u8 q12, d6, d14 126 127 vld1.8 {d2}, [r0], r1 128 vld1.8 {d10}, [r2], r3 129 130 vabal.u8 q12, d0, d8 131 132 vld1.8 {d4}, [r0], r1 133 vld1.8 {d12}, [r2], r3 134 135 vabal.u8 q12, d2, d10 136 137 vld1.8 {d6}, [r0], r1 138 vld1.8 {d14}, [r2], r3 139 140 vabal.u8 q12, d4, d12 141 142 vld1.8 {d0}, [r0], r1 143 vld1.8 {d8}, [r2], r3 144 145 vabal.u8 q12, d6, d14 146 147 vld1.8 {d2}, [r0], r1 148 vld1.8 {d10}, [r2], r3 149 150 vabal.u8 q12, d0, d8 151 152 vld1.8 {d4}, [r0], r1 153 vld1.8 {d12}, [r2], r3 154 155 vabal.u8 q12, d2, d10 156 157 vld1.8 {d6}, [r0], r1 158 vld1.8 {d14}, [r2], r3 159 160 vabal.u8 q12, d4, d12 161 vabal.u8 q12, d6, d14 162 163 vpaddl.u16 q1, q12 164 vpaddl.u32 q0, q1 165 vadd.u32 d0, d0, d1 166 167 vmov.32 r0, d0[0] 168 169 bx lr 170 171 ENDP 172 173;=========================== 174;unsigned int vp8_sad4x4_c( 175; unsigned char *src_ptr, 176; int src_stride, 177; unsigned char *ref_ptr, 178; int ref_stride) 179 180|vp8_sad4x4_neon| PROC 181 vld1.8 {d0}, [r0], r1 182 vld1.8 {d8}, [r2], r3 183 184 vld1.8 {d2}, [r0], r1 185 vld1.8 {d10}, [r2], r3 186 187 vabdl.u8 q12, d0, d8 188 189 vld1.8 {d4}, [r0], r1 190 vld1.8 {d12}, [r2], r3 191 192 vabal.u8 q12, d2, d10 193 194 vld1.8 {d6}, [r0], r1 195 vld1.8 {d14}, [r2], r3 196 197 vabal.u8 q12, d4, d12 198 vabal.u8 q12, d6, d14 199 200 vpaddl.u16 d1, d24 201 vpaddl.u32 d0, d1 202 vmov.32 r0, d0[0] 203 204 bx lr 205 206 ENDP 207 208 END 209