Lines Matching defs:xmm1

33   xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
34 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
44 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
45 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
56 xmm1 = _mm_loadu_si128(&xmm0); \
59 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \
62 xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \
66 xmm1 = _mm_adds_epi16(xmm1, xmm4); \
69 xmm1 = _mm_srai_epi16(xmm1, 6); \
72 xmm1 = _mm_packus_epi16(xmm1, xmm1); \
77 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
79 xmm1 = _mm_loadu_si128(&xmm0); \
81 xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \
83 _mm_storeu_si128((__m128i*)(dst_argb + 16), xmm1); \
93 __m128i xmm0, xmm1, xmm2, xmm4;
113 __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
272 movdqa xmm1, xmm0
274 punpckhwd xmm1, xmm1
276 por xmm1, xmm5
278 movdqu [edx + 16], xmm1
332 movdqu xmm1, [eax + 16]
336 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
339 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
343 pshufb xmm1, xmm4
345 por xmm1, xmm5
348 movdqu [edx + 16], xmm1
371 movdqu xmm1, [eax + 16]
375 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]}
378 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]}
382 pshufb xmm1, xmm4
384 por xmm1, xmm5
387 movdqu [edx + 16], xmm1
410 movdqu xmm1, [eax + 4]
414 pshufb xmm1, xmm4
417 movq qword ptr [edx + 8], xmm1
459 movdqa xmm1, xmm0
461 pand xmm1, xmm3 // R in upper 5 bits
463 pmulhuw xmm1, xmm5 // * (256 + 8)
465 psllw xmm1, 8
466 por xmm1, xmm2 // RB
470 movdqa xmm2, xmm1
471 punpcklbw xmm1, xmm0
473 movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
655 movdqa xmm1, xmm0
657 psllw xmm1, 1 // R in upper 5 bits
659 pand xmm1, xmm3
661 pmulhuw xmm1, xmm5 // * (256 + 8)
662 psllw xmm1, 8
663 por xmm1, xmm2 // RB
670 movdqa xmm2, xmm1
671 punpcklbw xmm1, xmm0
673 movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB
703 movdqa xmm1, xmm0
705 psllw xmm1, 4
707 por xmm0, xmm1
709 movdqa xmm1, xmm0
711 punpckhbw xmm1, xmm2
713 movdqu [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB
732 movdqu xmm1, [eax + 16]
737 pshufb xmm1, xmm6
740 movdqa xmm4, xmm1 // 4 bytes from 1 for 0
741 psrldq xmm1, 4 // 8 bytes from 1
747 por xmm1, xmm5 // 8 bytes from 2 for 1
751 movdqu [edx + 16], xmm1 // store 1
771 movdqu xmm1, [eax + 16]
776 pshufb xmm1, xmm6
779 movdqa xmm4, xmm1 // 4 bytes from 1 for 0
780 psrldq xmm1, 4 // 8 bytes from 1
786 por xmm1, xmm5 // 8 bytes from 2 for 1
790 movdqu [edx + 16], xmm1 // store 1
816 movdqa xmm1, xmm0 // B
819 psrld xmm1, 3 // B
822 pand xmm1, xmm3 // B
825 por xmm1, xmm2 // BG
826 por xmm0, xmm1 // BGR
862 movdqa xmm1, xmm0 // B
865 psrld xmm1, 3 // B
868 pand xmm1, xmm3 // B
871 por xmm1, xmm2 // BG
872 por xmm0, xmm1 // BGR
946 movdqa xmm1, xmm0 // B
950 psrld xmm1, 3 // B
954 pand xmm1, xmm4 // B
957 por xmm0, xmm1 // BA
984 movdqa xmm1, xmm0
986 pand xmm1, xmm4 // high nibble
988 psrld xmm1, 8
989 por xmm0, xmm1
1124 movdqu xmm1, [eax + 16]
1128 pmaddubsw xmm1, xmm4
1132 phaddw xmm0, xmm1
1160 movdqu xmm1, [eax + 16]
1164 pmaddubsw xmm1, xmm4
1168 phaddw xmm0, xmm1
1280 movdqu xmm1, [eax + 16]
1284 pmaddubsw xmm1, xmm4
1288 phaddw xmm0, xmm1
1314 movdqu xmm1, [eax + 16]
1318 pmaddubsw xmm1, xmm4
1322 phaddw xmm0, xmm1
1348 movdqu xmm1, [eax + 16]
1352 pmaddubsw xmm1, xmm4
1356 phaddw xmm0, xmm1
1393 movdqu xmm1, [eax + 16]
1395 pavgb xmm1, xmm4
1405 shufps xmm0, xmm1, 0x88
1406 shufps xmm4, xmm1, 0xdd
1416 movdqa xmm1, xmm0
1420 pmaddubsw xmm1, xmm6 // V
1423 phaddw xmm1, xmm3
1425 psraw xmm1, 8
1426 packsswb xmm0, xmm1
1465 movdqu xmm1, [eax + 16]
1467 pavgb xmm1, xmm4
1477 shufps xmm0, xmm1, 0x88
1478 shufps xmm4, xmm1, 0xdd
1488 movdqa xmm1, xmm0
1492 pmaddubsw xmm1, xmm6 // V
1495 phaddw xmm1, xmm3
1497 paddw xmm1, xmm5
1499 psraw xmm1, 8
1500 packsswb xmm0, xmm1
1670 movdqu xmm1, [eax + 16]
1674 pmaddubsw xmm1, xmm7
1677 phaddw xmm0, xmm1
1686 movdqu xmm1, [eax + 16]
1690 pmaddubsw xmm1, xmm6
1693 phaddw xmm0, xmm1
1733 movdqu xmm1, [eax + 16]
1735 pavgb xmm1, xmm4
1745 shufps xmm0, xmm1, 0x88
1746 shufps xmm4, xmm1, 0xdd
1756 movdqa xmm1, xmm0
1760 pmaddubsw xmm1, xmm6 // V
1763 phaddw xmm1, xmm3
1765 psraw xmm1, 8
1766 packsswb xmm0, xmm1
1805 movdqu xmm1, [eax + 16]
1807 pavgb xmm1, xmm4
1817 shufps xmm0, xmm1, 0x88
1818 shufps xmm4, xmm1, 0xdd
1828 movdqa xmm1, xmm0
1832 pmaddubsw xmm1, xmm6 // V
1835 phaddw xmm1, xmm3
1837 psraw xmm1, 8
1838 packsswb xmm0, xmm1
1877 movdqu xmm1, [eax + 16]
1879 pavgb xmm1, xmm4
1889 shufps xmm0, xmm1, 0x88
1890 shufps xmm4, xmm1, 0xdd
1900 movdqa xmm1, xmm0
1904 pmaddubsw xmm1, xmm6 // V
1907 phaddw xmm1, xmm3
1909 psraw xmm1, 8
1910 packsswb xmm0, xmm1
1931 __asm vmovdqu xmm1, [esi + edi] /* V */ \
1945 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
1959 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
2366 __asm movq xmm1, qword ptr [esi + edi] /* V */ \
2368 __asm punpcklbw xmm0, xmm1 /* UV */ \
2377 __asm movd xmm1, [esi + edi] /* V */ \
2379 __asm punpcklbw xmm0, xmm1 /* UV */ \
2389 __asm movd xmm1, [esi + edi] /* V */ \
2391 __asm punpcklbw xmm0, xmm1 /* UV */ \
2440 __asm movdqa xmm1, xmm0 \
2444 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \
2445 __asm psubw xmm0, xmm1 \
2446 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \
2448 __asm psubw xmm1, xmm2 \
2454 __asm paddsw xmm1, xmm4 /* G += Y */ \
2457 __asm psraw xmm1, 6 \
2460 __asm packuswb xmm1, xmm1 /* G */ \
2467 __asm punpcklbw xmm0, xmm1 /* BG */ \
2469 __asm movdqa xmm1, xmm0 \
2471 __asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \
2473 __asm movdqu 16[edx], xmm1 \
2480 __asm punpcklbw xmm1, xmm0 /* GB */ \
2483 __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \
2484 __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \
2493 __asm punpcklbw xmm1, xmm2 /* GR */ \
2496 __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \
2497 __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \
2505 __asm punpcklbw xmm0, xmm1 /* BG */ \
2507 __asm movdqa xmm1, xmm0 \
2509 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ /* RRGB -> RGB24 */ \
2511 __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \
2512 __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \
2514 __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \
2520 __asm punpcklbw xmm0, xmm1 /* BG */ \
2522 __asm movdqa xmm1, xmm0 \
2524 __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ /* RRGB -> RGB565 */ \
2536 __asm movdqa xmm3, xmm1 /* B next 4 pixels of argb */ \
2537 __asm movdqa xmm2, xmm1 /* G */ \
2538 __asm pslld xmm1, 8 /* R */ \
2541 __asm psrad xmm1, 16 /* R */ \
2544 __asm pand xmm1, xmm7 /* R */ \
2546 __asm por xmm1, xmm3 /* BGR */ \
2547 __asm packssdw xmm0, xmm1 \
2932 movdqa xmm1, xmm0
2934 punpckhwd xmm1, xmm1 // BGRA next 4 pixels
2936 por xmm1, xmm4
2938 movdqu [edx + 16], xmm1
3061 movdqa xmm1, xmmword ptr kShuffleMirrorUV
3068 pshufb xmm0, xmm1
3146 movdqu xmm1, [eax + 16]
3149 movdqa xmm3, xmm1
3151 pand xmm1, xmm5
3152 packuswb xmm0, xmm1
3224 movdqu xmm1, [eax + edx] // and 16 V's
3227 punpcklbw xmm0, xmm1 // first 8 UV pairs
3228 punpckhbw xmm2, xmm1 // next 8 UV pairs
3289 movdqa xmm1, [eax + 16]
3292 movdqa [edx + 16], xmm1
3300 movdqu xmm1, [eax + 16]
3303 movdqu [edx + 16], xmm1
3362 pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
3363 psrld xmm1, 8
3373 pand xmm4, xmm1
3374 pand xmm5, xmm1
3430 movdqu xmm1, [eax + 16]
3433 psrld xmm1, 24
3434 packssdw xmm0, xmm1
3493 pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff
3494 psrld xmm1, 8
3506 pand xmm4, xmm1
3507 pand xmm5, xmm1
3837 movdqu xmm1, [eax + 16]
3840 pand xmm1, xmm5
3841 packuswb xmm0, xmm1
3869 movdqu xmm1, [eax + 16]
3874 pavgb xmm1, xmm3
3876 psrlw xmm1, 8
3877 packuswb xmm0, xmm1
3878 movdqa xmm1, xmm0
3881 psrlw xmm1, 8 // V
3882 packuswb xmm1, xmm1
3884 movq qword ptr [edx + edi], xmm1
3911 movdqu xmm1, [eax + 16]
3914 psrlw xmm1, 8
3915 packuswb xmm0, xmm1
3916 movdqa xmm1, xmm0
3919 psrlw xmm1, 8 // V
3920 packuswb xmm1, xmm1
3922 movq qword ptr [edx + edi], xmm1
3942 movdqu xmm1, [eax + 16]
3945 psrlw xmm1, 8
3946 packuswb xmm0, xmm1
3974 movdqu xmm1, [eax + 16]
3979 pavgb xmm1, xmm3
3981 pand xmm1, xmm5
3982 packuswb xmm0, xmm1
3983 movdqa xmm1, xmm0
3986 psrlw xmm1, 8 // V
3987 packuswb xmm1, xmm1
3989 movq qword ptr [edx + edi], xmm1
4016 movdqu xmm1, [eax + 16]
4019 pand xmm1, xmm5
4020 packuswb xmm0, xmm1
4021 movdqa xmm1, xmm0
4024 psrlw xmm1, 8 // V
4025 packuswb xmm1, xmm1
4027 movq qword ptr [edx + edi], xmm1
4075 movq xmm1, qword ptr [eax + esi] // src0
4077 punpcklbw xmm1, xmm2
4078 psubb xmm1, xmm6 // bias src0/1 - 128
4079 pmaddubsw xmm0, xmm1
4197 movdqu xmm1, [esi] // _a_g
4199 psrlw xmm1, 8 // _a_g
4201 pmullw xmm1, xmm3 // _a_g * alpha
4204 pand xmm1, xmm5 // a_g_ convert to 8 bits again
4205 paddusb xmm0, xmm1 // + src argb
4226 movd xmm1, [esi] // _a_g
4228 psrlw xmm1, 8 // _a_g
4230 pmullw xmm1, xmm3 // _a_g * alpha
4233 pand xmm1, xmm5 // a_g_ convert to 8 bits again
4234 paddusb xmm0, xmm1 // + src argb
4271 movdqu xmm1, [eax] // read 4 pixels
4272 punpcklbw xmm1, xmm1 // first 2 pixel rgbs
4273 pmulhuw xmm0, xmm1 // rgb * a
4274 movdqu xmm1, [eax] // read 4 pixels
4275 pshufb xmm1, xmm5 // isolate next 2 alphas
4278 pmulhuw xmm1, xmm2 // rgb * a
4283 psrlw xmm1, 8
4284 packuswb xmm0, xmm1
4363 movdqu xmm1, [eax] // read 4 pixels
4366 punpckhbw xmm1, xmm1 // next 2
4372 pmulhuw xmm1, xmm2 // rgb * a
4374 packuswb xmm0, xmm1
4449 vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1]
4452 vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0]
4459 vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5]
4462 vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4]
4510 movdqu xmm1, [eax + 16]
4512 pmaddubsw xmm1, xmm4
4513 phaddw xmm0, xmm1
4527 movdqa xmm1, xmm0
4529 punpckhwd xmm1, xmm3 // GGGA next 4
4531 movdqu [edx + 16], xmm1
4572 movdqu xmm1, [eax + 16]
4574 pmaddubsw xmm1, xmm3
4575 phaddw xmm5, xmm1
4580 movdqu xmm1, [eax + 16]
4582 pmaddubsw xmm1, xmm4
4583 phaddw xmm5, xmm1
4587 movdqu xmm1, [eax + 16]
4589 psrld xmm1, 24
4590 packuswb xmm6, xmm1
4593 movdqa xmm1, xmm0 // Weave BG, RA together
4595 punpckhwd xmm1, xmm5 // BGRA next 4
4597 movdqu [eax + 16], xmm1
4632 movdqu xmm1, [eax + 16]
4634 pmaddubsw xmm1, xmm3
4636 phaddsw xmm6, xmm1 // G
4642 movdqu xmm1, [eax] // R
4644 pmaddubsw xmm1, xmm4
4646 phaddsw xmm1, xmm7 // R
4652 psraw xmm1, 6 // R
4654 packuswb xmm1, xmm1 // 8 R values
4656 punpcklbw xmm1, xmm6 // 8 RA values
4658 punpcklwd xmm0, xmm1 // BGRA first 4
4659 punpckhwd xmm6, xmm1 // BGRA next 4
4698 movdqu xmm1, [eax] // read 4 pixels
4699 punpckhbw xmm1, xmm5 // next 2 pixels
4700 pmulhuw xmm1, xmm2
4703 pmullw xmm1, xmm3
4706 paddw xmm1, xmm4
4707 packuswb xmm0, xmm1
4735 movdqa xmm1, xmm0
4737 punpckhbw xmm1, xmm1 // next 2
4739 pmulhuw xmm1, xmm2 // argb * value
4741 psrlw xmm1, 8
4742 packuswb xmm0, xmm1
4770 movdqu xmm1, xmm0
4773 punpckhbw xmm1, xmm1 // next 2
4777 pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2
4780 packuswb xmm0, xmm1
4812 movdqu xmm1, [esi] // read 4 pixels from src_argb1
4814 paddusb xmm0, xmm1 // src_argb0 + src_argb1
4827 movd xmm1, [esi] // read 1 pixels from src_argb1
4829 paddusb xmm0, xmm1 // src_argb0 + src_argb1
4858 movdqu xmm1, [esi] // read 4 pixels from src_argb1
4860 psubusb xmm0, xmm1 // src_argb0 - src_argb1
4995 movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2]
4997 punpcklbw xmm1, xmm5
4998 psubw xmm0, xmm1
4999 movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
5001 punpcklbw xmm1, xmm5
5003 psubw xmm1, xmm2
5010 paddw xmm0, xmm1
5011 paddw xmm0, xmm1
5012 pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
5013 psubw xmm1, xmm0
5014 pmaxsw xmm0, xmm1
5049 movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0]
5051 punpcklbw xmm1, xmm5
5052 psubw xmm0, xmm1
5053 movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1]
5055 punpcklbw xmm1, xmm5
5057 psubw xmm1, xmm2
5064 paddw xmm0, xmm1
5065 paddw xmm0, xmm1
5066 pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw
5067 psubw xmm1, xmm0
5068 pmaxsw xmm0, xmm1
5103 movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
5105 paddusb xmm0, xmm1 // sobel = sobelx + sobely
5109 movdqa xmm1, xmm2 // GGGG
5110 punpcklwd xmm1, xmm2 // First 4
5112 por xmm1, xmm5 // GGGA
5119 movdqu [edx], xmm1
5149 movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
5151 paddusb xmm0, xmm1 // sobel = sobelx + sobely
5184 movdqu xmm1, [eax + esi] // read 16 pixels src_sobely
5187 paddusb xmm2, xmm1 // sobel = sobelx + sobely
5191 movdqa xmm4, xmm1 // YS
5193 punpckhbw xmm1, xmm2
5197 movdqa xmm7, xmm1 // YSXA
5199 punpckhwd xmm1, xmm0 // Last 4
5203 movdqu [edx + 48], xmm1
5263 movdqu xmm1, [eax + 16]
5269 psubd xmm1, [eax + edx * 4 + 16]
5276 psubd xmm1, [esi + 16]
5282 paddd xmm1, [esi + edx * 4 + 16]
5287 packssdw xmm0, xmm1 // pack 4 pixels into 2 registers
5305 movdqu xmm1, [eax + 16]
5311 psubd xmm1, [eax + edx * 4 + 16]
5318 psubd xmm1, [esi + 16]
5324 paddd xmm1, [esi + edx * 4 + 16]
5330 cvtdq2ps xmm1, xmm1
5332 mulps xmm1, xmm4
5338 cvtps2dq xmm1, xmm1
5341 packssdw xmm0, xmm1
5388 pxor xmm1, xmm1
5401 punpcklbw xmm2, xmm1
5403 punpcklwd xmm2, xmm1
5404 punpckhwd xmm3, xmm1
5406 punpckhbw xmm4, xmm1
5408 punpcklwd xmm4, xmm1
5409 punpckhwd xmm5, xmm1
5445 punpcklbw xmm2, xmm1
5446 punpcklwd xmm2, xmm1
5499 cvttps2dq xmm1, xmm3 // x, y float to int next 2
5500 packssdw xmm0, xmm1 // x, y as 8 shorts
5506 movd xmm1, [eax + esi] // read pixel 0
5508 punpckldq xmm1, xmm6 // combine pixel 0 and 1
5510 movq qword ptr [edx], xmm1
5661 movdqu xmm1, xmm0
5663 punpckhbw xmm1, xmm2
5665 psubb xmm1, xmm4
5669 pmaddubsw xmm3, xmm1
5684 movdqu xmm1, [esi + edx]
5685 pavgb xmm0, xmm1
5721 movdqu xmm1, [eax + 16]
5724 pshufb xmm1, xmm5
5726 movdqu [edx + 16], xmm1
5810 movdqa xmm1, xmm0
5812 punpckhbw xmm1, xmm5
5815 pshufhw xmm1, xmm1, 01Bh
5816 pshuflw xmm1, xmm1, 01Bh
5817 packuswb xmm0, xmm1
5827 movdqa xmm1, xmm0
5829 punpckhbw xmm1, xmm5
5832 pshufhw xmm1, xmm1, 039h
5833 pshuflw xmm1, xmm1, 039h
5834 packuswb xmm0, xmm1
5844 movdqa xmm1, xmm0
5846 punpckhbw xmm1, xmm5
5849 pshufhw xmm1, xmm1, 093h
5850 pshuflw xmm1, xmm1, 093h
5851 packuswb xmm0, xmm1
5861 movdqa xmm1, xmm0
5863 punpckhbw xmm1, xmm5
5866 pshufhw xmm1, xmm1, 0C6h
5867 pshuflw xmm1, xmm1, 0C6h
5868 packuswb xmm0, xmm1
5909 movdqa xmm1, xmm0
5911 punpckhbw xmm1, xmm2
5913 movdqu [edi + 16], xmm1
5945 movdqa xmm1, xmm2
5947 punpcklbw xmm1, xmm0 // UYVY
5949 movdqu [edi], xmm1
5986 movdqa xmm1, xmm0 // X
5992 movdqa xmm2, xmm1
5994 mulps xmm2, xmm1 // X * X
5996 mulps xmm1, xmm2 // X * X * X
6000 mulps xmm1, [esi + 48] // C3 * X * X * X
6004 addps xmm0, xmm1 // result += C3 * X * X * X