选项 -farray-widen-compare

数组宽比较优化:支持使用宽数据类型对原数组指针(指向的数组元素为窄类型)解引用,达到一次比较多个元素的效果,从而提高程序性能。

针对如下代码场景:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
#define my_min(x, y) ((x) < (y) ? (x) : (y))

uint32_t func (uint32_t n0, uint32_t n1, const uint32_t limit, const uint8_t * a, const uint8_t * b)
{
    uint32_t n = my_min(n0, n1);
    while (++n != limit)
    if (a[n] != b[n])
        break;
    return n;
} 

添加选项后,可以优化为如下代码:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#define my_min(x, y) ((x) < (y) ? (x) : (y))
uint32_t func (uint32_t n0, uint32_t n1, const uint32_t limit, const uint8_t * a, const uint8_t * b)
    {
        uint32_t n = my_min(n0, n1);
        for (++n; n + sizeof(uint64_t) <= limit; n += sizeof(uint64_t))
        {
            uint64_t k1 = *((uint64_t*)(a+n));
            uint64_t k2 = *((uint64_t*)(b+n));
            if(k1 != k2)
            {
                int lz = __builtin_ctzll(k1 ^ k2);
                n += lz/8;
                return n;
             }
    }

    for (;n != limit; ++n)
    {
        if (a[n] != b[n])
            break;
     }
     return n;
}

使用方法

增加编译选项-O3 -farray-widen-compare