1/*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkTypes.h"
9#include <arm_neon.h>
10
11void sk_memset32_neon(uint32_t dst[], uint32_t value, int count) {
12    uint32x4_t   v4  = vdupq_n_u32(value);
13    uint32x4x4_t v16 = { v4, v4, v4, v4 };
14
15    while (count >= 16) {
16        vst4q_u32(dst, v16);  // This swizzles, but we don't care: all lanes are the same, value.
17        dst   += 16;
18        count -= 16;
19    }
20    SkASSERT(count < 16);
21    switch (count / 4) {
22        case 3: vst1q_u32(dst, v4); dst += 4; count -= 4;
23        case 2: vst1q_u32(dst, v4); dst += 4; count -= 4;
24        case 1: vst1q_u32(dst, v4); dst += 4; count -= 4;
25    }
26    SkASSERT(count < 4);
27    if (count >= 2) {
28        vst1_u32(dst, vget_low_u32(v4));
29        dst   += 2;
30        count -= 2;
31    }
32    SkASSERT(count < 2);
33    if (count > 0) {
34        *dst = value;
35    }
36}
37
38void sk_memset16_neon(uint16_t dst[], uint16_t value, int count) {
39    uint16x8_t   v8  = vdupq_n_u16(value);
40    uint16x8x4_t v32 = { v8, v8, v8, v8 };
41
42    while (count >= 32) {
43        vst4q_u16(dst, v32);  // This swizzles, but we don't care: all lanes are the same, value.
44        dst   += 32;
45        count -= 32;
46    }
47    SkASSERT(count < 32);
48    switch (count / 8) {
49        case 3: vst1q_u16(dst, v8); dst += 8; count -= 8;
50        case 2: vst1q_u16(dst, v8); dst += 8; count -= 8;
51        case 1: vst1q_u16(dst, v8); dst += 8; count -= 8;
52    }
53    SkASSERT(count < 8);
54    if (count >= 4) {
55        vst1_u16(dst, vget_low_u16(v8));
56        dst   += 4;
57        count -= 4;
58    }
59    SkASSERT(count < 4);
60    switch (count) {
61        case 3: *dst++ = value;
62        case 2: *dst++ = value;
63        case 1: *dst   = value;
64    }
65}
66
67