Replacing the x86 CRC32 Assembly Instruction
Symptom
Compilation error: "unknown mnemonic 'crc32q'-- 'crc32q (x3),x2'"; or "operand 1 should be an integer register -- 'crc32b (x1),x0'"; or "unrecognized command line option '-msse4.2'"
Possible Cause
x86 uses crc32b and crc32q assembly instructions to calculate the CRC32C value, while the Arm64 platform uses four assembly instructions, including crc32cb, crc32ch, crc32cw, and crc32cx.
Solution
Replace the CRC32 series assembly instructions of x86 with crc32cb, crc32ch, crc32cw, and crc32cx. Table 1 describes the replacement method. Add the -mcpu=generic+crc compilation parameter during compilation.
Instruction |
Input Data Bit Width (Unit: Bit) |
Remarks |
|---|---|---|
crc32cb |
8 |
Applies to 8-bit input data and replaces the x86 crc32b assembly instruction. |
crc32ch |
16 |
Applies to 16-bit input data. |
crc32cw |
32 |
Applies to 32-bit input data. |
crc32cx |
64 |
Applies to 64-bit input data and replaces the x86 crc32q assembly instruction. |
Example:
- Code on x86:
#define CRC32CB(crc, value) __asm__("crc32b\t" "(%1), %0" : "=r"(crc) : "r"(value), "0"(crc)) #define CRC32CP(crc, value) __asm__("crc32q\t" "(%1), %0" : "=r"(crc) : "r"(value), "0"(crc)) uint32_t crc32c(uint32_t crc, void const *buf, unsigned int len) { uint64_t crc0 = crc; unsigned char const *next = buf; unsigned char const * const end = next + (len - (len & 7)); while (next < end) { CRC32CP(crc0, next); next += 8; } len &= 7; while (len) { CRC32CB(crc0, next); next++; len--; } return crc0; } - Code on Arm64:
#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value)) #define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) #define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) #define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value)) uint32_t crc32c_arm64_le_hw(uint32_t crc, const uint8_t *p, unsigned int len) { int64_t length = len; while ((length -= sizeof(uint64_t)) >= 0) { CRC32CX(crc, *((uint64_t *)p)); p += sizeof(uint64_t); } if (length & sizeof(uint32_t)) { CRC32CW(crc, *((uint32_t *)p)); p += sizeof(uint32_t); } if (length & sizeof(uint16_t)) { CRC32CH(crc, *((uint16_t *)p)); p += sizeof(uint16_t); } if (length & sizeof(uint8_t)) { CRC32CB(crc, *p); } return crc; }