1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11%include "third_party/x86inc/x86inc.asm" 12 13SECTION_RODATA 14pw_4: times 8 dw 4 15pw_8: times 8 dw 8 16pw_16: times 8 dw 16 17pw_32: times 8 dw 32 18 19SECTION .text 20 21INIT_MMX sse 22cglobal dc_predictor_4x4, 4, 5, 2, dst, stride, above, left, goffset 23 GET_GOT goffsetq 24 25 pxor m1, m1 26 movd m0, [aboveq] 27 punpckldq m0, [leftq] 28 psadbw m0, m1 29 paddw m0, [GLOBAL(pw_4)] 30 psraw m0, 3 31 pshufw m0, m0, 0x0 32 packuswb m0, m0 33 movd [dstq ], m0 34 movd [dstq+strideq], m0 35 lea dstq, [dstq+strideq*2] 36 movd [dstq ], m0 37 movd [dstq+strideq], m0 38 39 RESTORE_GOT 40 RET 41 42INIT_MMX sse 43cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset 44 GET_GOT goffsetq 45 46 pxor m1, m1 47 movq m0, [aboveq] 48 movq m2, [leftq] 49 DEFINE_ARGS dst, stride, stride3 50 lea stride3q, [strideq*3] 51 psadbw m0, m1 52 psadbw m2, m1 53 paddw m0, m2 54 paddw m0, [GLOBAL(pw_8)] 55 psraw m0, 4 56 pshufw m0, m0, 0x0 57 packuswb m0, m0 58 movq [dstq ], m0 59 movq [dstq+strideq ], m0 60 movq [dstq+strideq*2], m0 61 movq [dstq+stride3q ], m0 62 lea dstq, [dstq+strideq*4] 63 movq [dstq ], m0 64 movq [dstq+strideq ], m0 65 movq [dstq+strideq*2], m0 66 movq [dstq+stride3q ], m0 67 68 RESTORE_GOT 69 RET 70 71INIT_XMM sse2 72cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset 73 GET_GOT goffsetq 74 75 pxor m1, m1 76 mova m0, [aboveq] 77 mova m2, [leftq] 78 DEFINE_ARGS dst, stride, stride3, lines4 79 lea stride3q, [strideq*3] 80 mov lines4d, 4 81 psadbw m0, m1 82 psadbw m2, m1 83 paddw m0, m2 84 movhlps m2, m0 85 paddw m0, m2 86 paddw m0, [GLOBAL(pw_16)] 87 psraw m0, 5 88 pshuflw m0, m0, 0x0 89 punpcklqdq m0, m0 90 packuswb m0, m0 91.loop: 92 mova [dstq ], m0 93 mova [dstq+strideq ], m0 94 mova [dstq+strideq*2], m0 95 mova [dstq+stride3q ], m0 96 lea dstq, [dstq+strideq*4] 97 dec lines4d 98 jnz .loop 99 100 RESTORE_GOT 101 REP_RET 102 103INIT_XMM sse2 104cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset 105 GET_GOT goffsetq 106 107 pxor m1, m1 108 mova m0, [aboveq] 109 mova m2, [aboveq+16] 110 mova m3, [leftq] 111 mova m4, [leftq+16] 112 DEFINE_ARGS dst, stride, stride3, lines4 113 lea stride3q, [strideq*3] 114 mov lines4d, 8 115 psadbw m0, m1 116 psadbw m2, m1 117 psadbw m3, m1 118 psadbw m4, m1 119 paddw m0, m2 120 paddw m0, m3 121 paddw m0, m4 122 movhlps m2, m0 123 paddw m0, m2 124 paddw m0, [GLOBAL(pw_32)] 125 psraw m0, 6 126 pshuflw m0, m0, 0x0 127 punpcklqdq m0, m0 128 packuswb m0, m0 129.loop: 130 mova [dstq ], m0 131 mova [dstq +16], m0 132 mova [dstq+strideq ], m0 133 mova [dstq+strideq +16], m0 134 mova [dstq+strideq*2 ], m0 135 mova [dstq+strideq*2+16], m0 136 mova [dstq+stride3q ], m0 137 mova [dstq+stride3q +16], m0 138 lea dstq, [dstq+strideq*4] 139 dec lines4d 140 jnz .loop 141 142 RESTORE_GOT 143 REP_RET 144 145INIT_MMX sse 146cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above 147 movd m0, [aboveq] 148 movd [dstq ], m0 149 movd [dstq+strideq], m0 150 lea dstq, [dstq+strideq*2] 151 movd [dstq ], m0 152 movd [dstq+strideq], m0 153 RET 154 155INIT_MMX sse 156cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above 157 movq m0, [aboveq] 158 DEFINE_ARGS dst, stride, stride3 159 lea stride3q, [strideq*3] 160 movq [dstq ], m0 161 movq [dstq+strideq ], m0 162 movq [dstq+strideq*2], m0 163 movq [dstq+stride3q ], m0 164 lea dstq, [dstq+strideq*4] 165 movq [dstq ], m0 166 movq [dstq+strideq ], m0 167 movq [dstq+strideq*2], m0 168 movq [dstq+stride3q ], m0 169 RET 170 171INIT_XMM sse2 172cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above 173 mova m0, [aboveq] 174 DEFINE_ARGS dst, stride, stride3, nlines4 175 lea stride3q, [strideq*3] 176 mov nlines4d, 4 177.loop: 178 mova [dstq ], m0 179 mova [dstq+strideq ], m0 180 mova [dstq+strideq*2], m0 181 mova [dstq+stride3q ], m0 182 lea dstq, [dstq+strideq*4] 183 dec nlines4d 184 jnz .loop 185 REP_RET 186 187INIT_XMM sse2 188cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above 189 mova m0, [aboveq] 190 mova m1, [aboveq+16] 191 DEFINE_ARGS dst, stride, stride3, nlines4 192 lea stride3q, [strideq*3] 193 mov nlines4d, 8 194.loop: 195 mova [dstq ], m0 196 mova [dstq +16], m1 197 mova [dstq+strideq ], m0 198 mova [dstq+strideq +16], m1 199 mova [dstq+strideq*2 ], m0 200 mova [dstq+strideq*2+16], m1 201 mova [dstq+stride3q ], m0 202 mova [dstq+stride3q +16], m1 203 lea dstq, [dstq+strideq*4] 204 dec nlines4d 205 jnz .loop 206 REP_RET 207 208INIT_MMX sse 209cglobal tm_predictor_4x4, 4, 4, 4, dst, stride, above, left 210 pxor m1, m1 211 movd m2, [aboveq-1] 212 movd m0, [aboveq] 213 punpcklbw m2, m1 214 punpcklbw m0, m1 215 pshufw m2, m2, 0x0 216 DEFINE_ARGS dst, stride, line, left 217 mov lineq, -2 218 add leftq, 4 219 psubw m0, m2 220.loop: 221 movd m2, [leftq+lineq*2] 222 movd m3, [leftq+lineq*2+1] 223 punpcklbw m2, m1 224 punpcklbw m3, m1 225 pshufw m2, m2, 0x0 226 pshufw m3, m3, 0x0 227 paddw m2, m0 228 paddw m3, m0 229 packuswb m2, m2 230 packuswb m3, m3 231 movd [dstq ], m2 232 movd [dstq+strideq], m3 233 lea dstq, [dstq+strideq*2] 234 inc lineq 235 jnz .loop 236 REP_RET 237 238INIT_XMM sse2 239cglobal tm_predictor_8x8, 4, 4, 4, dst, stride, above, left 240 pxor m1, m1 241 movd m2, [aboveq-1] 242 movq m0, [aboveq] 243 punpcklbw m2, m1 244 punpcklbw m0, m1 245 pshuflw m2, m2, 0x0 246 DEFINE_ARGS dst, stride, line, left 247 mov lineq, -4 248 punpcklqdq m2, m2 249 add leftq, 8 250 psubw m0, m2 251.loop: 252 movd m2, [leftq+lineq*2] 253 movd m3, [leftq+lineq*2+1] 254 punpcklbw m2, m1 255 punpcklbw m3, m1 256 pshuflw m2, m2, 0x0 257 pshuflw m3, m3, 0x0 258 punpcklqdq m2, m2 259 punpcklqdq m3, m3 260 paddw m2, m0 261 paddw m3, m0 262 packuswb m2, m3 263 movq [dstq ], m2 264 movhps [dstq+strideq], m2 265 lea dstq, [dstq+strideq*2] 266 inc lineq 267 jnz .loop 268 REP_RET 269 270INIT_XMM sse2 271cglobal tm_predictor_16x16, 4, 4, 7, dst, stride, above, left 272 pxor m1, m1 273 movd m2, [aboveq-1] 274 mova m0, [aboveq] 275 punpcklbw m2, m1 276 punpckhbw m4, m0, m1 277 punpcklbw m0, m1 278 pshuflw m2, m2, 0x0 279 DEFINE_ARGS dst, stride, line, left 280 mov lineq, -8 281 punpcklqdq m2, m2 282 add leftq, 16 283 psubw m0, m2 284 psubw m4, m2 285.loop: 286 movd m2, [leftq+lineq*2] 287 movd m3, [leftq+lineq*2+1] 288 punpcklbw m2, m1 289 punpcklbw m3, m1 290 pshuflw m2, m2, 0x0 291 pshuflw m3, m3, 0x0 292 punpcklqdq m2, m2 293 punpcklqdq m3, m3 294 paddw m5, m2, m0 295 paddw m6, m3, m0 296 paddw m2, m4 297 paddw m3, m4 298 packuswb m5, m2 299 packuswb m6, m3 300 mova [dstq ], m5 301 mova [dstq+strideq], m6 302 lea dstq, [dstq+strideq*2] 303 inc lineq 304 jnz .loop 305 REP_RET 306 307%if ARCH_X86_64 308INIT_XMM sse2 309cglobal tm_predictor_32x32, 4, 4, 10, dst, stride, above, left 310 pxor m1, m1 311 movd m2, [aboveq-1] 312 mova m0, [aboveq] 313 mova m4, [aboveq+16] 314 punpcklbw m2, m1 315 punpckhbw m3, m0, m1 316 punpckhbw m5, m4, m1 317 punpcklbw m0, m1 318 punpcklbw m4, m1 319 pshuflw m2, m2, 0x0 320 DEFINE_ARGS dst, stride, line, left 321 mov lineq, -16 322 punpcklqdq m2, m2 323 add leftq, 32 324 psubw m0, m2 325 psubw m3, m2 326 psubw m4, m2 327 psubw m5, m2 328.loop: 329 movd m2, [leftq+lineq*2] 330 movd m6, [leftq+lineq*2+1] 331 punpcklbw m2, m1 332 punpcklbw m6, m1 333 pshuflw m2, m2, 0x0 334 pshuflw m6, m6, 0x0 335 punpcklqdq m2, m2 336 punpcklqdq m6, m6 337 paddw m7, m2, m0 338 paddw m8, m2, m3 339 paddw m9, m2, m4 340 paddw m2, m5 341 packuswb m7, m8 342 packuswb m9, m2 343 paddw m2, m6, m0 344 paddw m8, m6, m3 345 mova [dstq ], m7 346 paddw m7, m6, m4 347 paddw m6, m5 348 mova [dstq +16], m9 349 packuswb m2, m8 350 packuswb m7, m6 351 mova [dstq+strideq ], m2 352 mova [dstq+strideq+16], m7 353 lea dstq, [dstq+strideq*2] 354 inc lineq 355 jnz .loop 356 REP_RET 357%endif 358