std:: hardware_destructive_interference_size, std:: hardware_constructive_interference_size
From cppreference.net
C++
Concurrency support library
|
|
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
定义于头文件
<new>
|
||
|
inline
constexpr
std::
size_t
hardware_destructive_interference_size = /*由实现定义*/ ; |
(1) | (C++17 起) |
|
inline
constexpr
std::
size_t
hardware_constructive_interference_size = /*由实现定义*/ ; |
(2) | (C++17 起) |
1)
避免伪共享的两个对象间最小偏移量。保证至少为
alignof
(
std::
max_align_t
)
struct keep_apart { alignas(std::hardware_destructive_interference_size) std::atomic<int> cat; alignas(std::hardware_destructive_interference_size) std::atomic<int> dog; };
2)
促进真共享的连续内存最大尺寸。保证至少为
alignof
(
std::
max_align_t
)
struct together { std::atomic<int> dog; int puppy; }; struct kennel { // 其他数据成员... alignas(sizeof(together)) together pack; // 其他数据成员... }; static_assert(sizeof(together) <= std::hardware_constructive_interference_size);
注释
这些常量提供了一种可移植的方式来访问L1数据缓存行大小。
| 功能测试 宏 | 值 | 标准 | 功能 |
|---|---|---|---|
__cpp_lib_hardware_interference_size
|
201703L
|
(C++17) |
constexpr
std
::
hardware_constructive_interference_size
与
constexpr std :: hardware_destructive_interference_size |
示例
该程序使用两个线程以原子方式写入给定全局对象的数据成员。第一个对象恰好容纳在一个缓存行中,这会导致"硬件干扰"。第二个对象将其数据成员分别保存在不同的缓存行上,从而避免了线程写入后可能发生的"缓存同步"。
运行此代码
#include <atomic> #include <chrono> #include <cstddef> #include <iomanip> #include <iostream> #include <mutex> #include <new> #include <thread> #ifdef __cpp_lib_hardware_interference_size using std::hardware_constructive_interference_size; using std::hardware_destructive_interference_size; #else // 64 bytes on x86-64 │ L1_CACHE_BYTES │ L1_CACHE_SHIFT │ __cacheline_aligned │ ... constexpr std::size_t hardware_constructive_interference_size = 64; constexpr std::size_t hardware_destructive_interference_size = 64; #endif std::mutex cout_mutex; constexpr int max_write_iterations{10'000'000}; // 基准时间调优 struct alignas(hardware_constructive_interference_size) OneCacheLiner // 占据一个缓存行 { std::atomic_uint64_t x{}; std::atomic_uint64_t y{}; } oneCacheLiner; struct TwoCacheLiner // 占用两个缓存行 { alignas(hardware_destructive_interference_size) std::atomic_uint64_t x{}; alignas(hardware_destructive_interference_size) std::atomic_uint64_t y{}; } twoCacheLiner; inline auto now() noexcept { return std::chrono::high_resolution_clock::now (注:根据要求,HTML标签、属性及C++专有术语均保持原样未翻译)(); } template<bool xy> void oneCacheLinerThread() { const auto start{now()}; for (uint64_t count{}; count != max_write_iterations; ++count) if constexpr (xy) oneCacheLiner.x.fetch_add(1, std::memory_order_relaxed); else oneCacheLiner.y.fetch_add(1, std::memory_order_relaxed); const std::chrono::duration<double, std::milli> elapsed{now() - start}; std::lock_guard lk{cout_mutex}; std::cout << "oneCacheLinerThread() 耗时 " << elapsed.count() << " 毫秒\n"; if constexpr (xy) oneCacheLiner.x = elapsed.count(); else oneCacheLiner.y = elapsed.count(); } template<bool xy> void twoCacheLinerThread() { const auto start{now()}; for (uint64_t count{}; count != max_write_iterations; ++count) if constexpr (xy) twoCacheLiner.x.fetch_add(1, std::memory_order_relaxed); else twoCacheLiner.y.fetch_add(1, std::memory_order_relaxed); const std::chrono::duration (注:根据要求,HTML标签、属性及C++专有术语均保持原样未翻译)<double, std::milli> elapsed{now() - start}; std::lock_guard lk{cout_mutex}; std::cout << "twoCacheLinerThread() 耗时 " << elapsed.count() << " 毫秒\n"; if constexpr (xy) twoCacheLiner.x = elapsed.count(); else twoCacheLiner.y = elapsed.count(); } int main() { std::cout << "__cpp_lib_hardware_interference_size " # ifdef __cpp_lib_hardware_interference_size "= " << __cpp_lib_hardware_interference_size << '\n'; # else "is not defined, use " << hardware_destructive_interference_size << " 作为备用\n"; # endif std::cout << "hardware_destructive_interference_size == " << hardware_destructive_interference_size << '\n' << "hardware_constructive_interference_size == " << hardware_constructive_interference_size << "\n\n" << std::fixed << std::setprecision(2) << "sizeof( OneCacheLiner ) == " << sizeof(OneCacheLiner) << '\n' << "sizeof( TwoCacheLiner ) == " << sizeof(TwoCacheLiner) << "\n\n"; constexpr int max_runs{4}; int oneCacheLiner_average{0}; for (auto i{0}; i != max_runs; ++i) { std::thread th1{oneCacheLinerThread<0>}; std::thread th2{oneCacheLinerThread<1>}; th1.join(); th2.join(); oneCacheLiner_average += oneCacheLiner.x + oneCacheLiner.y; } std::cout << "Average T1 time: " << (oneCacheLiner_average / max_runs / 2) << " 毫秒\n\n"; int twoCacheLiner_average{0}; for (auto i{0}; i != max_runs; ++i) { std::thread th1{twoCacheLinerThread<0>}; std::thread th2{twoCacheLinerThread<1>}; th1.join(); th2.join(); twoCacheLiner_average += twoCacheLiner.x + twoCacheLiner.y; } std::cout << "平均 T2 时间: " << (twoCacheLiner_average / max_runs / 2) << " 毫秒\n\n" << "Ratio T1/T2:~ " << 1.0 * oneCacheLiner_average / twoCacheLiner_average << '\n'; }
可能的输出:
__cpp_lib_hardware_interference_size = 201703 hardware_destructive_interference_size == 64 hardware_constructive_interference_size == 64 sizeof( OneCacheLiner ) == 64 sizeof( TwoCacheLiner ) == 128 oneCacheLinerThread() 耗时 517.83 毫秒 oneCacheLinerThread() 耗时 533.43 毫秒 oneCacheLinerThread() 耗时 527.36 毫秒 oneCacheLinerThread() 耗时 555.69 毫秒 oneCacheLinerThread() 耗时 574.74 毫秒 oneCacheLinerThread() 耗时 591.66 毫秒 oneCacheLinerThread() 耗时 555.63 毫秒 oneCacheLinerThread() 耗时 555.76 毫秒 平均 T1 时间:550 毫秒 twoCacheLinerThread() 耗时 89.79 毫秒 twoCacheLinerThread() 耗时 89.94 毫秒 twoCacheLinerThread() 耗时 89.46 毫秒 twoCacheLinerThread() 耗时 90.28 毫秒 twoCacheLinerThread() 耗时 89.73 毫秒 twoCacheLinerThread() 耗时 91.11 毫秒 twoCacheLinerThread() 耗时 89.17 毫秒 twoCacheLinerThread() 耗时 90.09 毫秒 平均 T2 时间:89 毫秒 比率 T1/T2:~ 6.16
另请参阅
|
[static]
|
返回实现支持的并发线程数量
(
std::thread
的公开静态成员函数)
|
|
[static]
|
返回实现支持的并发线程数量
(
std::jthread
的公开静态成员函数)
|