Lines Matching defs:xmm1

33     xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));                      \
34 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
44 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
45 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
56 xmm1 = _mm_loadu_si128(&xmm0); \
59 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \
62 xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \
66 xmm1 = _mm_adds_epi16(xmm1, xmm4); \
69 xmm1 = _mm_srai_epi16(xmm1, 6); \
72 xmm1 = _mm_packus_epi16(xmm1, xmm1); \
77 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
79 xmm1 = _mm_loadu_si128(&xmm0); \
81 xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \
83 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \
94 __m128i xmm0, xmm1, xmm2, xmm4;
114 __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
305 movdqa xmm1, xmm0
307 punpckhwd xmm1, xmm1
309 por xmm1, xmm5
311 movdqu [edx + 16], xmm1
363 movdqu xmm1, [eax + 16]
367 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
370 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
374 pshufb xmm1, xmm4
376 por xmm1, xmm5
379 movdqu [edx + 16], xmm1
402 movdqu xmm1, [eax + 16]
406 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
409 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
413 pshufb xmm1, xmm4
415 por xmm1, xmm5
418 movdqu [edx + 16], xmm1
440 movdqu xmm1, [eax + 4]
444 pshufb xmm1, xmm4
447 movq qword ptr [edx + 8], xmm1
489 movdqa xmm1, xmm0
491 pand xmm1, xmm3 // R in upper 5 bits
493 pmulhuw xmm1, xmm5 // * (256 + 8)
495 psllw xmm1, 8
496 por xmm1, xmm2 // RB
500 movdqa xmm2, xmm1
501 punpcklbw xmm1, xmm0
503 movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
685 movdqa xmm1, xmm0
687 psllw xmm1, 1 // R in upper 5 bits
689 pand xmm1, xmm3
691 pmulhuw xmm1, xmm5 // * (256 + 8)
692 psllw xmm1, 8
693 por xmm1, xmm2 // RB
700 movdqa xmm2, xmm1
701 punpcklbw xmm1, xmm0
703 movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
733 movdqa xmm1, xmm0
735 psllw xmm1, 4
737 por xmm0, xmm1
739 movdqa xmm1, xmm0
741 punpckhbw xmm1, xmm2
743 movdqu [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB
761 movdqu xmm1, [eax + 16]
766 pshufb xmm1, xmm6
769 movdqa xmm4, xmm1 // 4 bytes from 1 for 0
770 psrldq xmm1, 4 // 8 bytes from 1
776 por xmm1, xmm5 // 8 bytes from 2 for 1
780 movdqu [edx + 16], xmm1 // store 1
799 movdqu xmm1, [eax + 16]
804 pshufb xmm1, xmm6
807 movdqa xmm4, xmm1 // 4 bytes from 1 for 0
808 psrldq xmm1, 4 // 8 bytes from 1
814 por xmm1, xmm5 // 8 bytes from 2 for 1
818 movdqu [edx + 16], xmm1 // store 1
843 movdqa xmm1, xmm0 // B
846 psrld xmm1, 3 // B
849 pand xmm1, xmm3 // B
852 por xmm1, xmm2 // BG
853 por xmm0, xmm1 // BGR
888 movdqa xmm1, xmm0 // B
891 psrld xmm1, 3 // B
894 pand xmm1, xmm3 // B
897 por xmm1, xmm2 // BG
898 por xmm0, xmm1 // BGR
970 movdqa xmm1, xmm0 // B
974 psrld xmm1, 3 // B
978 pand xmm1, xmm4 // B
981 por xmm0, xmm1 // BA
1007 movdqa xmm1, xmm0
1009 pand xmm1, xmm4 // high nibble
1011 psrld xmm1, 8
1012 por xmm0, xmm1
1143 movdqu xmm1, [eax + 16]
1147 pmaddubsw xmm1, xmm4
1151 phaddw xmm0, xmm1
1178 movdqu xmm1, [eax + 16]
1182 pmaddubsw xmm1, xmm4
1186 phaddw xmm0, xmm1
1297 movdqu xmm1, [eax + 16]
1301 pmaddubsw xmm1, xmm4
1305 phaddw xmm0, xmm1
1330 movdqu xmm1, [eax + 16]
1334 pmaddubsw xmm1, xmm4
1338 phaddw xmm0, xmm1
1363 movdqu xmm1, [eax + 16]
1367 pmaddubsw xmm1, xmm4
1371 phaddw xmm0, xmm1
1406 movdqu xmm1, [eax + 16]
1408 pavgb xmm1, xmm4
1418 shufps xmm0, xmm1, 0x88
1419 shufps xmm4, xmm1, 0xdd
1429 movdqa xmm1, xmm0
1433 pmaddubsw xmm1, xmm6 // V
1436 phaddw xmm1, xmm3
1438 psraw xmm1, 8
1439 packsswb xmm0, xmm1
1476 movdqu xmm1, [eax + 16]
1478 pavgb xmm1, xmm4
1488 shufps xmm0, xmm1, 0x88
1489 shufps xmm4, xmm1, 0xdd
1499 movdqa xmm1, xmm0
1503 pmaddubsw xmm1, xmm6 // V
1506 phaddw xmm1, xmm3
1508 paddw xmm1, xmm5
1510 psraw xmm1, 8
1511 packsswb xmm0, xmm1
1676 movdqu xmm1, [eax + 16]
1680 pmaddubsw xmm1, xmm7
1683 phaddw xmm0, xmm1
1692 movdqu xmm1, [eax + 16]
1696 pmaddubsw xmm1, xmm6
1699 phaddw xmm0, xmm1
1737 movdqu xmm1, [eax + 16]
1739 pavgb xmm1, xmm4
1749 shufps xmm0, xmm1, 0x88
1750 shufps xmm4, xmm1, 0xdd
1760 movdqa xmm1, xmm0
1764 pmaddubsw xmm1, xmm6 // V
1767 phaddw xmm1, xmm3
1769 psraw xmm1, 8
1770 packsswb xmm0, xmm1
1807 movdqu xmm1, [eax + 16]
1809 pavgb xmm1, xmm4
1819 shufps xmm0, xmm1, 0x88
1820 shufps xmm4, xmm1, 0xdd
1830 movdqa xmm1, xmm0
1834 pmaddubsw xmm1, xmm6 // V
1837 phaddw xmm1, xmm3
1839 psraw xmm1, 8
1840 packsswb xmm0, xmm1
1877 movdqu xmm1, [eax + 16]
1879 pavgb xmm1, xmm4
1889 shufps xmm0, xmm1, 0x88
1890 shufps xmm4, xmm1, 0xdd
1900 movdqa xmm1, xmm0
1904 pmaddubsw xmm1, xmm6 // V
1907 phaddw xmm1, xmm3
1909 psraw xmm1, 8
1910 packsswb xmm0, xmm1
1930 __asm vmovdqu xmm1, [esi + edi] /* V */ \
1944 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
1958 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
1975 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \
2420 __asm movq xmm1, qword ptr [esi + edi] /* V */ \
2422 __asm punpcklbw xmm0, xmm1 /* UV */ \
2431 __asm movd xmm1, [esi + edi] /* V */ \
2433 __asm punpcklbw xmm0, xmm1 /* UV */ \
2443 __asm movd xmm1, [esi + edi] /* V */ \
2445 __asm punpcklbw xmm0, xmm1 /* UV */ \
2457 // __asm pinsrw xmm1, [esi + edi], 0 /* V */
2462 __asm movd xmm1, ebx \
2464 __asm punpcklbw xmm0, xmm1 /* UV */ \
2512 __asm movdqa xmm1, xmm0 \
2516 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \
2517 __asm psubw xmm0, xmm1 \
2518 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \
2520 __asm psubw xmm1, xmm2 \
2526 __asm paddsw xmm1, xmm4 /* G += Y */ \
2529 __asm psraw xmm1, 6 \
2532 __asm packuswb xmm1, xmm1 /* G */ \
2538 __asm punpcklbw xmm0, xmm1 /* BG */ \
2540 __asm movdqa xmm1, xmm0 \
2542 __asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \
2544 __asm movdqu 16[edx], xmm1 \
2551 __asm punpcklbw xmm1, xmm0 /* GB */ \
2554 __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \
2555 __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \
2564 __asm punpcklbw xmm1, xmm2 /* GR */ \
2567 __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \
2568 __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \
2577 __asm punpcklbw xmm0, xmm1 /* BG */ \
2579 __asm movdqa xmm1, xmm0 \
2581 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
2584 __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \
2585 __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \
2587 __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \
2594 __asm punpcklbw xmm0, xmm1 /* BG */ \
2596 __asm movdqa xmm1, xmm0 \
2598 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ \
2611 __asm movdqa xmm3, xmm1 /* B next 4 pixels of argb */ \
2612 __asm movdqa xmm2, xmm1 /* G */ \
2613 __asm pslld xmm1, 8 /* R */ \
2616 __asm psrad xmm1, 16 /* R */ \
2619 __asm pand xmm1, xmm7 /* R */ \
2621 __asm por xmm1, xmm3 /* BGR */ \
2622 __asm packssdw xmm0, xmm1 \
3049 movdqa xmm1, xmm0
3051 punpckhwd xmm1, xmm1 // BGRA next 4 pixels
3053 por xmm1, xmm4
3055 movdqu [edx + 16], xmm1
3180 movdqa xmm1, xmmword ptr kShuffleMirrorUV
3187 pshufb xmm0, xmm1
3264 movdqu xmm1, [eax + 16]
3267 movdqa xmm3, xmm1
3269 pand xmm1, xmm5
3270 packuswb xmm0, xmm1
3340 movdqu xmm1, [eax + edx] // and 16 V's
3343 punpcklbw xmm0, xmm1 // first 8 UV pairs
3344 punpckhbw xmm2, xmm1 // next 8 UV pairs
3405 movdqa xmm1, [eax + 16]
3408 movdqa [edx + 16], xmm1
3416 movdqu xmm1, [eax + 16]
3419 movdqu [edx + 16], xmm1
3479 pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
3480 psrld xmm1, 8
3490 pand xmm4, xmm1
3491 pand xmm5, xmm1
3545 movdqu xmm1, [eax + 16]
3548 psrld xmm1, 24
3549 packssdw xmm0, xmm1
3571 pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
3572 psrld xmm1, 8
3584 pand xmm4, xmm1
3585 pand xmm5, xmm1
3910 movdqu xmm1, [eax + 16]
3913 pand xmm1, xmm5
3914 packuswb xmm0, xmm1
3940 movdqu xmm1, [eax + 16]
3945 pavgb xmm1, xmm3
3947 psrlw xmm1, 8
3948 packuswb xmm0, xmm1
3949 movdqa xmm1, xmm0
3952 psrlw xmm1, 8 // V
3953 packuswb xmm1, xmm1
3955 movq qword ptr [edx + edi], xmm1
3981 movdqu xmm1, [eax + 16]
3984 psrlw xmm1, 8
3985 packuswb xmm0, xmm1
3986 movdqa xmm1, xmm0
3989 psrlw xmm1, 8 // V
3990 packuswb xmm1, xmm1
3992 movq qword ptr [edx + edi], xmm1
4012 movdqu xmm1, [eax + 16]
4015 psrlw xmm1, 8
4016 packuswb xmm0, xmm1
4042 movdqu xmm1, [eax + 16]
4047 pavgb xmm1, xmm3
4049 pand xmm1, xmm5
4050 packuswb xmm0, xmm1
4051 movdqa xmm1, xmm0
4054 psrlw xmm1, 8 // V
4055 packuswb xmm1, xmm1
4057 movq qword ptr [edx + edi], xmm1
4083 movdqu xmm1, [eax + 16]
4086 pand xmm1, xmm5
4087 packuswb xmm0, xmm1
4088 movdqa xmm1, xmm0
4091 psrlw xmm1, 8 // V
4092 packuswb xmm1, xmm1
4094 movq qword ptr [edx + edi], xmm1
4140 movq xmm1, qword ptr [eax + esi] // src0
4142 punpcklbw xmm1, xmm2
4143 psubb xmm1, xmm6 // bias src0/1 - 128
4144 pmaddubsw xmm0, xmm1
4261 movdqu xmm1, [esi] // _a_g
4263 psrlw xmm1, 8 // _a_g
4265 pmullw xmm1, xmm3 // _a_g * alpha
4268 pand xmm1, xmm5 // a_g_ convert to 8 bits again
4269 paddusb xmm0, xmm1 // + src argb
4290 movd xmm1, [esi] // _a_g
4292 psrlw xmm1, 8 // _a_g
4294 pmullw xmm1, xmm3 // _a_g * alpha
4297 pand xmm1, xmm5 // a_g_ convert to 8 bits again
4298 paddusb xmm0, xmm1 // + src argb
4334 movdqu xmm1, [eax] // read 4 pixels
4335 punpcklbw xmm1, xmm1 // first 2 pixel rgbs
4336 pmulhuw xmm0, xmm1 // rgb * a
4337 movdqu xmm1, [eax] // read 4 pixels
4338 pshufb xmm1, xmm5 // isolate next 2 alphas
4341 pmulhuw xmm1, xmm2 // rgb * a
4346 psrlw xmm1, 8
4347 packuswb xmm0, xmm1
4425 movdqu xmm1, [eax] // read 4 pixels
4428 punpckhbw xmm1, xmm1 // next 2
4434 pmulhuw xmm1, xmm2 // rgb * a
4436 packuswb xmm0, xmm1
4512 vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1]
4515 vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
4522 vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5]
4525 vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
4572 movdqu xmm1, [eax + 16]
4574 pmaddubsw xmm1, xmm4
4575 phaddw xmm0, xmm1
4589 movdqa xmm1, xmm0
4591 punpckhwd xmm1, xmm3 // GGGA next 4
4593 movdqu [edx + 16], xmm1
4638 movdqu xmm1, [eax + 16]
4640 pmaddubsw xmm1, xmm3
4641 phaddw xmm5, xmm1
4646 movdqu xmm1, [eax + 16]
4648 pmaddubsw xmm1, xmm4
4649 phaddw xmm5, xmm1
4653 movdqu xmm1, [eax + 16]
4655 psrld xmm1, 24
4656 packuswb xmm6, xmm1
4659 movdqa xmm1, xmm0 // Weave BG, RA together
4661 punpckhwd xmm1, xmm5 // BGRA next 4
4663 movdqu [eax + 16], xmm1
4697 movdqu xmm1, [eax + 16]
4699 pmaddubsw xmm1, xmm3
4701 phaddsw xmm6, xmm1 // G
4707 movdqu xmm1, [eax] // R
4709 pmaddubsw xmm1, xmm4
4711 phaddsw xmm1, xmm7 // R
4717 psraw xmm1, 6 // R
4719 packuswb xmm1, xmm1 // 8 R values
4721 punpcklbw xmm1, xmm6 // 8 RA values
4723 punpcklwd xmm0, xmm1 // BGRA first 4
4724 punpckhwd xmm6, xmm1 // BGRA next 4
4761 movdqu xmm1, [eax] // read 4 pixels
4762 punpckhbw xmm1, xmm5 // next 2 pixels
4763 pmulhuw xmm1, xmm2
4766 pmullw xmm1, xmm3
4769 paddw xmm1, xmm4
4770 packuswb xmm0, xmm1
4797 movdqa xmm1, xmm0
4799 punpckhbw xmm1, xmm1 // next 2
4801 pmulhuw xmm1, xmm2 // argb * value
4803 psrlw xmm1, 8
4804 packuswb xmm0, xmm1
4831 movdqu xmm1, xmm0
4834 punpckhbw xmm1, xmm1 // next 2
4838 pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2
4841 packuswb xmm0, xmm1
4872 movdqu xmm1, [esi] // read 4 pixels from src_argb1
4874 paddusb xmm0, xmm1 // src_argb0 + src_argb1
4887 movd xmm1, [esi] // read 1 pixels from src_argb1
4889 paddusb xmm0, xmm1 // src_argb0 + src_argb1
4917 movdqu xmm1, [esi] // read 4 pixels from src_argb1
4919 psubusb xmm0, xmm1 // src_argb0 - src_argb1
5049 movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
5051 punpcklbw xmm1, xmm5
5052 psubw xmm0, xmm1
5053 movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
5055 punpcklbw xmm1, xmm5
5057 psubw xmm1, xmm2
5064 paddw xmm0, xmm1
5065 paddw xmm0, xmm1
5066 pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
5067 psubw xmm1, xmm0
5068 pmaxsw xmm0, xmm1
5102 movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
5104 punpcklbw xmm1, xmm5
5105 psubw xmm0, xmm1
5106 movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1]
5108 punpcklbw xmm1, xmm5
5110 psubw xmm1, xmm2
5117 paddw xmm0, xmm1
5118 paddw xmm0, xmm1
5119 pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
5120 psubw xmm1, xmm0
5121 pmaxsw xmm0, xmm1
5155 movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
5157 paddusb xmm0, xmm1 // sobel = sobelx + sobely
5161 movdqa xmm1, xmm2 // GGGG
5162 punpcklwd xmm1, xmm2 // First 4
5164 por xmm1, xmm5 // GGGA
5171 movdqu [edx], xmm1
5200 movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
5202 paddusb xmm0, xmm1 // sobel = sobelx + sobely
5234 movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
5237 paddusb xmm2, xmm1 // sobel = sobelx + sobely
5241 movdqa xmm4, xmm1 // YS
5243 punpckhbw xmm1, xmm2
5247 movdqa xmm7, xmm1 // YSXA
5249 punpckhwd xmm1, xmm0 // Last 4
5253 movdqu [edx + 48], xmm1
5310 movdqu xmm1, [eax + 16]
5316 psubd xmm1, [eax + edx * 4 + 16]
5323 psubd xmm1, [esi + 16]
5329 paddd xmm1, [esi + edx * 4 + 16]
5334 packssdw xmm0, xmm1 // pack 4 pixels into 2 registers
5352 movdqu xmm1, [eax + 16]
5358 psubd xmm1, [eax + edx * 4 + 16]
5365 psubd xmm1, [esi + 16]
5371 paddd xmm1, [esi + edx * 4 + 16]
5377 cvtdq2ps xmm1, xmm1
5379 mulps xmm1, xmm4
5385 cvtps2dq xmm1, xmm1
5388 packssdw xmm0, xmm1
5433 pxor xmm1, xmm1
5446 punpcklbw xmm2, xmm1
5448 punpcklwd xmm2, xmm1
5449 punpckhwd xmm3, xmm1
5451 punpckhbw xmm4, xmm1
5453 punpcklwd xmm4, xmm1
5454 punpckhwd xmm5, xmm1
5490 punpcklbw xmm2, xmm1
5491 punpcklwd xmm2, xmm1
5543 cvttps2dq xmm1, xmm3 // x, y float to int next 2
5544 packssdw xmm0, xmm1 // x, y as 8 shorts
5550 movd xmm1, [eax + esi] // read pixel 0
5552 punpckldq xmm1, xmm6 // combine pixel 0 and 1
5554 movq qword ptr [edx], xmm1
5703 movdqu xmm1, xmm0
5705 punpckhbw xmm1, xmm2
5707 psubb xmm1, xmm4
5711 pmaddubsw xmm3, xmm1
5726 movdqu xmm1, [esi + edx]
5727 pavgb xmm0, xmm1
5762 movdqu xmm1, [eax + 16]
5765 pshufb xmm1, xmm5
5767 movdqu [edx + 16], xmm1
5849 movdqa xmm1, xmm0
5851 punpckhbw xmm1, xmm5
5854 pshufhw xmm1, xmm1, 01Bh
5855 pshuflw xmm1, xmm1, 01Bh
5856 packuswb xmm0, xmm1
5866 movdqa xmm1, xmm0
5868 punpckhbw xmm1, xmm5
5871 pshufhw xmm1, xmm1, 039h
5872 pshuflw xmm1, xmm1, 039h
5873 packuswb xmm0, xmm1
5883 movdqa xmm1, xmm0
5885 punpckhbw xmm1, xmm5
5888 pshufhw xmm1, xmm1, 093h
5889 pshuflw xmm1, xmm1, 093h
5890 packuswb xmm0, xmm1
5900 movdqa xmm1, xmm0
5902 punpckhbw xmm1, xmm5
5905 pshufhw xmm1, xmm1, 0C6h
5906 pshuflw xmm1, xmm1, 0C6h
5907 packuswb xmm0, xmm1
5948 movdqa xmm1, xmm0
5950 punpckhbw xmm1, xmm2
5952 movdqu [edi + 16], xmm1
5984 movdqa xmm1, xmm2
5986 punpcklbw xmm1, xmm0 // UYVY
5988 movdqu [edi], xmm1
6025 movdqa xmm1, xmm0 // X
6031 movdqa xmm2, xmm1
6033 mulps xmm2, xmm1 // X * X
6035 mulps xmm1, xmm2 // X * X * X
6039 mulps xmm1, [esi + 48] // C3 * X * X * X
6043 addps xmm0, xmm1 // result += C3 * X * X * X