Rate This Document
Findability
Accuracy
Completeness
Readability

Replacing the popcntq Assembly Instruction

The POPCNT function is used to count the number of 1s.

  • Code on x86:
    static inline int64_t POPCNT_popcnt_u64(uint64_t a) { 
    int64_t result; 
    __asm__("popcntq %1, %0" : "=r"(result) : "mr"(a) : "cc"); 
    return result; 
    }
  • Alternative for Kunpeng processors:
    #include <arm_neon.h>
    static inline int POPCNT_popcnt_u64(uint64_t x) { 
    uint64_t count_result = 0; 
    uint64_t count[1]; 
    uint8x8_t input_val, count8x8_val; 
    uint16x4_t count16x4_val; 
    uint32x2_t count32x2_val; 
    uint64x1_t count64x1_val; 
    input_val = vld1_u8((unsigned char *) &x); 
    count8x8_val = vcnt_u8(input_val); 
    count16x4_val = vpaddl_u8(count8x8_val); 
    count32x2_val = vpaddl_u16(count16x4_val); 
    count64x1_val = vpaddl_u32(count32x2_val); 
    vst1_u64(count, count64x1_val); 
    count_result=count[0]; 
    return count_result; 
    }