1#include "crc32c.h"
2
3#define CRC32C3X8(ITR) \
4	crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\
5	crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\
6	crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR)));
7
8#define CRC32C7X3X8(ITR) do {\
9	CRC32C3X8((ITR)*7+0) \
10	CRC32C3X8((ITR)*7+1) \
11	CRC32C3X8((ITR)*7+2) \
12	CRC32C3X8((ITR)*7+3) \
13	CRC32C3X8((ITR)*7+4) \
14	CRC32C3X8((ITR)*7+5) \
15	CRC32C3X8((ITR)*7+6) \
16	} while(0)
17
18#ifndef HWCAP_CRC32
19#define HWCAP_CRC32             (1 << 7)
20#endif /* HWCAP_CRC32 */
21
22int crc32c_arm64_available = 0;
23
24#ifdef ARCH_HAVE_ARM64_CRC_CRYPTO
25
26#include <sys/auxv.h>
27#include <arm_acle.h>
28#include <arm_neon.h>
29
30static int crc32c_probed;
31
32/*
33 * Function to calculate reflected crc with PMULL Instruction
34 * crc done "by 3" for fixed input block size of 1024 bytes
35 */
36uint32_t crc32c_arm64(unsigned char const *data, unsigned long length)
37{
38	signed long len = length;
39	uint32_t crc = ~0;
40	uint32_t crc0, crc1, crc2;
41
42	/* Load two consts: K1 and K2 */
43	const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014;
44	uint64_t t0, t1;
45
46	while ((len -= 1024) >= 0) {
47		/* Do first 8 bytes here for better pipelining */
48		crc0 = __crc32cd(crc, *(const uint64_t *)data);
49		crc1 = 0;
50		crc2 = 0;
51		data += sizeof(uint64_t);
52
53		/* Process block inline
54		   Process crc0 last to avoid dependency with above */
55		CRC32C7X3X8(0);
56		CRC32C7X3X8(1);
57		CRC32C7X3X8(2);
58		CRC32C7X3X8(3);
59		CRC32C7X3X8(4);
60		CRC32C7X3X8(5);
61
62		data += 42*3*sizeof(uint64_t);
63
64		/* Merge crc0 and crc1 into crc2
65		   crc1 multiply by K2
66		   crc0 multiply by K1 */
67
68		t1 = (uint64_t)vmull_p64(crc1, k2);
69		t0 = (uint64_t)vmull_p64(crc0, k1);
70		crc = __crc32cd(crc2, *(const uint64_t *)data);
71		crc1 = __crc32cd(0, t1);
72		crc ^= crc1;
73		crc0 = __crc32cd(0, t0);
74		crc ^= crc0;
75
76		data += sizeof(uint64_t);
77	}
78
79	if (!(len += 1024))
80		return crc;
81
82	while ((len -= sizeof(uint64_t)) >= 0) {
83                crc = __crc32cd(crc, *(const uint64_t *)data);
84                data += sizeof(uint64_t);
85        }
86
87        /* The following is more efficient than the straight loop */
88        if (len & sizeof(uint32_t)) {
89                crc = __crc32cw(crc, *(const uint32_t *)data);
90                data += sizeof(uint32_t);
91        }
92        if (len & sizeof(uint16_t)) {
93                crc = __crc32ch(crc, *(const uint16_t *)data);
94                data += sizeof(uint16_t);
95        }
96        if (len & sizeof(uint8_t)) {
97                crc = __crc32cb(crc, *(const uint8_t *)data);
98        }
99
100	return crc;
101}
102
103void crc32c_arm64_probe(void)
104{
105	unsigned long hwcap;
106
107	if (!crc32c_probed) {
108		hwcap = getauxval(AT_HWCAP);
109		if (hwcap & HWCAP_CRC32)
110			crc32c_arm64_available = 1;
111		crc32c_probed = 1;
112	}
113}
114
115#endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */
116