Branchless Programming in C++

17 Mar 2022

[ c++  performance  development  ]

Profiling

Summary

References

Code spippets

static void BM_branch_predicted(benchmark::State& state) {
  srand(1);
  const unsigned int N = state.range(0);
  std::vector<unsigned long> v1(N), v2(N);
  std::vector<int> c1(N);
  for(size_t i = 0; i < N; i++) {
    v1[i] = rand();
    v2[i] = rand();
    c1[i] = rand()>=0;
  }
  unsigned long* p1 = v1.data();
  unsigned long* p2 = v2.data();
  int* b1 = c1.data();
  for(auto _ : state) {
    unsigned long a1 = 0, a2 = 0;
    for(size_t i = 0; i < N; i++) {
      if(b1[i]) {
        a1 += p1[i];
      } else {
        a2 *= p2[i];
      }
    }
    benchmark::DoNotOptimize(a1);
    benchmark::DoNotOptimize(a2);
    benchmark::ClobberMemory();
  }
  state.SetItemsProcessed(N*state.iterations());
}
// Register the function as a benchmark
BENCHMARK(BM_branch_predicted)->Arg(1<<22);