代码 参考了这篇文章 https://preshing.com/20120515/memory-reordering-caught-in-the-act/
这件事情比较重要的原因是,我们需要有个实验平台才能验证自己的理论是否正确,尤其是做性能优化(memory barrier). 如果性能提升上去了,但是发现结果会出错,即便是以比较小的概率出错,那么我觉得这个性能提升也是不值得的。
这个小程序是这样的,三个线程:
- t0. 对 x=1, r1=y 进行操作
- t1. 对 y=1, r1=x 进行操作
- control. 控制t0, t1同时执行上面的操作
在原文里面是在Win32上使用信号量机制来做latch的,我这里改做了CAS. 使用随机数是希望等待一定时间,好让t0,t1可以同时开始进行操作,提高出现问题的几率。 三个线程没有任何blocking的操作,所以可能会比较费CPU,使用率可能能到300%.
doris-sandbox04 :: ~ » g++ WatchMemoryOrdering.cpp -O2 -lpthread doris-sandbox04 :: ~ » ./a.out 1 reorders detected after 366 iterations 2 reorders detected after 413 iterations 3 reorders detected after 2495 iterations 4 reorders detected after 4144 iterations 5 reorders detected after 4362 iterations 6 reorders detected after 5493 iterations 7 reorders detected after 5902 iterations doris-sandbox04 :: ~ » g++ WatchMemoryOrdering.cpp -O2 -lpthread -DUSE_FENCE doris-sandbox04 :: ~ » time ./a.out ^C ./a.out 12.31s user 3.66s system 295% cpu 5.408 total
这是一个很不错的实验。过去我一直以为复现这个问题比较困难,现在发现也不需要太多代码就可以做到,关键还是在设计精巧的实验上。
/* coding:utf-8
* Copyright (C) dirlt
*/
#include <atomic>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
// https://preshing.com/20120515/memory-reordering-caught-in-the-act/
using namespace std;
atomic<int> t0, t1, ctl;
int r0, r1;
int X, Y;
const int waiting = 20;
#define WAIT_AND_SET(t, exp, act) \
do { \
for (;;) { \
int _exp = exp; \
if (t.compare_exchange_strong(_exp, 0)) { \
break; \
} \
} \
} while (0)
#ifdef USE_FENCE
#define FENCE() atomic_thread_fence(memory_order_seq_cst)
#else
#define FENCE()
#endif
void thread0() {
for (;;) {
WAIT_AND_SET(t0, 1, 0);
while ((rand() % waiting) != 0) {
}
X = 1;
FENCE();
r0 = Y;
ctl.fetch_add(1);
}
}
void thread1() {
for (;;) {
WAIT_AND_SET(t1, 1, 0);
while ((rand() % waiting) != 0) {
}
Y = 1;
FENCE();
r1 = X;
ctl.fetch_add(1);
}
}
void control() {
int detected = 0;
int iterations = 0;
for (;;) {
X = 0;
Y = 0;
iterations++;
t0.store(1);
t1.store(1);
WAIT_AND_SET(ctl, 2, 0);
if (r1 == 0 && r0 == 0) {
detected++;
printf("%d reorders detected after %d iterations\n", detected,
iterations);
}
}
}
int main() {
// initialization.
r0 = 1;
r1 = 1;
t0 = 0;
t1 = 0;
ctl = 0;
X = 0;
Y = 0;
// start thread.
thread _t0(thread0);
thread _t1(thread1);
thread _ctl(control);
_t0.join();
_t1.join();
_ctl.join();
return 0;
}