16c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org/* 26c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 36c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org * 46c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org * Use of this source code is governed by a BSD-style license 56c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org * that can be found in the LICENSE file in the root of the source 66c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org * tree. An additional intellectual property rights grant can be found 76c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org * in the file PATENTS. All contributing project authors may 86c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org * be found in the AUTHORS file in the root of the source tree. 96c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org */ 106c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org 116c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org#include "webrtc/modules/desktop_capture/differ_block_sse2.h" 126c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org 136c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org#if defined(_MSC_VER) 146c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org#include <intrin.h> 156c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org#else 166c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org#include <mmintrin.h> 176c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org#include <emmintrin.h> 186c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org#endif 196c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org 206c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org#include "webrtc/modules/desktop_capture/differ_block.h" 216c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org 226c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.orgnamespace webrtc { 236c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org 246c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.orgextern int BlockDifference_SSE2_W16(const uint8_t* image1, 256c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org const uint8_t* image2, 266c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org int stride) { 276c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org __m128i acc = _mm_setzero_si128(); 286c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org __m128i v0; 296c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org __m128i v1; 306c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org __m128i sad; 316c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org for (int y = 0; y < kBlockSize; ++y) { 326c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); 336c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); 346c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1); 356c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2); 366c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 376c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 386c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 1); 396c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 1); 406c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 416c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 426c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 2); 436c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 2); 446c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 456c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 466c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 3); 476c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 3); 486c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 496c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 506c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org 516c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org // This essential means sad = acc >> 64. We only care about the lower 16 526c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org // bits. 536c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_shuffle_epi32(acc, 0xEE); 546c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_adds_epu16(sad, acc); 556c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org int diff = _mm_cvtsi128_si32(sad); 566c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org if (diff) 576c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org return 1; 586c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org image1 += stride; 596c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org image2 += stride; 606c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org } 616c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org return 0; 626c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org} 636c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org 646c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.orgextern int BlockDifference_SSE2_W32(const uint8_t* image1, 656c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org const uint8_t* image2, 666c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org int stride) { 676c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org __m128i acc = _mm_setzero_si128(); 686c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org __m128i v0; 696c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org __m128i v1; 706c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org __m128i sad; 716c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org for (int y = 0; y < kBlockSize; ++y) { 726c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org const __m128i* i1 = reinterpret_cast<const __m128i*>(image1); 736c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org const __m128i* i2 = reinterpret_cast<const __m128i*>(image2); 746c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1); 756c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2); 766c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 776c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 786c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 1); 796c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 1); 806c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 816c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 826c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 2); 836c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 2); 846c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 856c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 866c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 3); 876c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 3); 886c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 896c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 906c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 4); 916c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 4); 926c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 936c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 946c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 5); 956c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 5); 966c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 976c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 986c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 6); 996c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 6); 1006c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 1016c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 1026c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v0 = _mm_loadu_si128(i1 + 7); 1036c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org v1 = _mm_loadu_si128(i2 + 7); 1046c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_sad_epu8(v0, v1); 1056c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org acc = _mm_adds_epu16(acc, sad); 1066c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org 1076c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org // This essential means sad = acc >> 64. We only care about the lower 16 1086c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org // bits. 1096c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_shuffle_epi32(acc, 0xEE); 1106c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org sad = _mm_adds_epu16(sad, acc); 1116c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org int diff = _mm_cvtsi128_si32(sad); 1126c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org if (diff) 1136c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org return 1; 1146c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org image1 += stride; 1156c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org image2 += stride; 1166c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org } 1176c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org return 0; 1186c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org} 1196c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org 1206c82a7ea6ea92e8c68b37112186fd928b11ddc49sergeyu@chromium.org} // namespace webrtc 121