13845591fadea480177223e28c9d1c03642d34f0eJens Axboe#include <inttypes.h>
25d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe#include <string.h>
35d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe#include <unistd.h>
45d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe#include <stdlib.h>
55d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe#include <signal.h>
65d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe#include <sys/types.h>
75d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe#include <sys/wait.h>
8419484b934222e589f0b718e9494b045df176167Aaron Carroll#include "crc32c.h"
93845591fadea480177223e28c9d1c03642d34f0eJens Axboe
103845591fadea480177223e28c9d1c03642d34f0eJens Axboe/*
113845591fadea480177223e28c9d1c03642d34f0eJens Axboe * Based on a posting to lkml by Austin Zhang <austin.zhang@intel.com>
123845591fadea480177223e28c9d1c03642d34f0eJens Axboe *
133845591fadea480177223e28c9d1c03642d34f0eJens Axboe * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
143845591fadea480177223e28c9d1c03642d34f0eJens Axboe * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
153845591fadea480177223e28c9d1c03642d34f0eJens Axboe * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
163845591fadea480177223e28c9d1c03642d34f0eJens Axboe * http://www.intel.com/products/processor/manuals/
173845591fadea480177223e28c9d1c03642d34f0eJens Axboe * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
183845591fadea480177223e28c9d1c03642d34f0eJens Axboe * Volume 2A: Instruction Set Reference, A-M
193845591fadea480177223e28c9d1c03642d34f0eJens Axboe */
203845591fadea480177223e28c9d1c03642d34f0eJens Axboe
21e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboeint crc32c_intel_available = 0;
22e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboe
232f68124f26e54233db41b462a257dabc48e5c32bJens Axboe#ifdef ARCH_HAVE_SSE4_2
24419484b934222e589f0b718e9494b045df176167Aaron Carroll
253845591fadea480177223e28c9d1c03642d34f0eJens Axboe#if BITS_PER_LONG == 64
263845591fadea480177223e28c9d1c03642d34f0eJens Axboe#define REX_PRE "0x48, "
273845591fadea480177223e28c9d1c03642d34f0eJens Axboe#define SCALE_F 8
283845591fadea480177223e28c9d1c03642d34f0eJens Axboe#else
293845591fadea480177223e28c9d1c03642d34f0eJens Axboe#define REX_PRE
303845591fadea480177223e28c9d1c03642d34f0eJens Axboe#define SCALE_F 4
313845591fadea480177223e28c9d1c03642d34f0eJens Axboe#endif
323845591fadea480177223e28c9d1c03642d34f0eJens Axboe
33e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboestatic int crc32c_probed;
34e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboe
35cc62ea704e4de10eb595b551f29af41266cf225dJens Axboestatic uint32_t crc32c_intel_le_hw_byte(uint32_t crc, unsigned char const *data,
36cc62ea704e4de10eb595b551f29af41266cf225dJens Axboe					unsigned long length)
373845591fadea480177223e28c9d1c03642d34f0eJens Axboe{
383845591fadea480177223e28c9d1c03642d34f0eJens Axboe	while (length--) {
393845591fadea480177223e28c9d1c03642d34f0eJens Axboe		__asm__ __volatile__(
403845591fadea480177223e28c9d1c03642d34f0eJens Axboe			".byte 0xf2, 0xf, 0x38, 0xf0, 0xf1"
413845591fadea480177223e28c9d1c03642d34f0eJens Axboe			:"=S"(crc)
423845591fadea480177223e28c9d1c03642d34f0eJens Axboe			:"0"(crc), "c"(*data)
433845591fadea480177223e28c9d1c03642d34f0eJens Axboe		);
443845591fadea480177223e28c9d1c03642d34f0eJens Axboe		data++;
453845591fadea480177223e28c9d1c03642d34f0eJens Axboe	}
463845591fadea480177223e28c9d1c03642d34f0eJens Axboe
473845591fadea480177223e28c9d1c03642d34f0eJens Axboe	return crc;
483845591fadea480177223e28c9d1c03642d34f0eJens Axboe}
493845591fadea480177223e28c9d1c03642d34f0eJens Axboe
503845591fadea480177223e28c9d1c03642d34f0eJens Axboe/*
513845591fadea480177223e28c9d1c03642d34f0eJens Axboe * Steps through buffer one byte at at time, calculates reflected
523845591fadea480177223e28c9d1c03642d34f0eJens Axboe * crc using table.
533845591fadea480177223e28c9d1c03642d34f0eJens Axboe */
543845591fadea480177223e28c9d1c03642d34f0eJens Axboeuint32_t crc32c_intel(unsigned char const *data, unsigned long length)
553845591fadea480177223e28c9d1c03642d34f0eJens Axboe{
563845591fadea480177223e28c9d1c03642d34f0eJens Axboe	unsigned int iquotient = length / SCALE_F;
573845591fadea480177223e28c9d1c03642d34f0eJens Axboe	unsigned int iremainder = length % SCALE_F;
583845591fadea480177223e28c9d1c03642d34f0eJens Axboe#if BITS_PER_LONG == 64
593845591fadea480177223e28c9d1c03642d34f0eJens Axboe	uint64_t *ptmp = (uint64_t *) data;
603845591fadea480177223e28c9d1c03642d34f0eJens Axboe#else
613845591fadea480177223e28c9d1c03642d34f0eJens Axboe	uint32_t *ptmp = (uint32_t *) data;
623845591fadea480177223e28c9d1c03642d34f0eJens Axboe#endif
633845591fadea480177223e28c9d1c03642d34f0eJens Axboe	uint32_t crc = ~0;
643845591fadea480177223e28c9d1c03642d34f0eJens Axboe
653845591fadea480177223e28c9d1c03642d34f0eJens Axboe	while (iquotient--) {
663845591fadea480177223e28c9d1c03642d34f0eJens Axboe		__asm__ __volatile__(
673845591fadea480177223e28c9d1c03642d34f0eJens Axboe			".byte 0xf2, " REX_PRE "0xf, 0x38, 0xf1, 0xf1;"
683845591fadea480177223e28c9d1c03642d34f0eJens Axboe			:"=S"(crc)
693845591fadea480177223e28c9d1c03642d34f0eJens Axboe			:"0"(crc), "c"(*ptmp)
703845591fadea480177223e28c9d1c03642d34f0eJens Axboe		);
713845591fadea480177223e28c9d1c03642d34f0eJens Axboe		ptmp++;
723845591fadea480177223e28c9d1c03642d34f0eJens Axboe	}
733845591fadea480177223e28c9d1c03642d34f0eJens Axboe
743845591fadea480177223e28c9d1c03642d34f0eJens Axboe	if (iremainder)
753845591fadea480177223e28c9d1c03642d34f0eJens Axboe		crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
763845591fadea480177223e28c9d1c03642d34f0eJens Axboe				 iremainder);
773845591fadea480177223e28c9d1c03642d34f0eJens Axboe
783845591fadea480177223e28c9d1c03642d34f0eJens Axboe	return crc;
793845591fadea480177223e28c9d1c03642d34f0eJens Axboe}
80419484b934222e589f0b718e9494b045df176167Aaron Carroll
81e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboevoid crc32c_intel_probe(void)
825d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe{
83e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboe	if (!crc32c_probed) {
84267339ff794d41af0f3714483c7ab75985d8a85fJens Axboe		unsigned int eax, ebx, ecx = 0, edx;
85419484b934222e589f0b718e9494b045df176167Aaron Carroll
86e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboe		eax = 1;
87e0ab5f977075ec2f8ad42378c95eb800a611f0efJens Axboe
88e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboe		do_cpuid(&eax, &ebx, &ecx, &edx);
89e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboe		crc32c_intel_available = (ecx & (1 << 20)) != 0;
90e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboe		crc32c_probed = 1;
91e3aaafc4e902a0f78b8ee4d4bc85f8edac6e09d2Jens Axboe	}
925d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe}
935d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe
945d7c5d348d66794fd5e8aa3090f582358e6c5017Jens Axboe#endif /* ARCH_HAVE_SSE */
95