Replace broken ColdMutex with dual-layer ShmSpinLock + std::mutex

The old ColdMutex (interprocess_mutex) was in process-local anonymous
namespace storage, so each process had its own copy — no actual
cross-process exclusion. Even if moved to SHM, interprocess_mutex is
not robust: a crash while holding the lock would deadlock on restart.

New design:
- ShmSpinLock: atomic<pid_t> in shared memory, kill(pid,0) detects
  dead owners (ESRCH → takeover), crash-safe by construction
- std::mutex: process-local, handles intra-process thread contention
  without burning CPU on the SHM spinlock
- DualLock: locks local first, then shm; unlocks in reverse

9 lock sites in MapRuleStat upgraded to std::lock_guard<DualLock>.
This commit is contained in:
Huamonarch 2026-05-13 10:21:55 +08:00
parent b8596d311f
commit 9bb810d9ed

View File

@ -5,7 +5,7 @@
* *
* display Memcached * display Memcached
* stat_values/running_time/shear_times * stat_values/running_time/shear_times
* 使 boost::interprocess::interprocess_mutex * 使 ShmSpinLockkill(pid,0) + std::mutex
* *
* @author your name (you@domain.com) * @author your name (you@domain.com)
* @version 0.2 * @version 0.2
@ -16,16 +16,73 @@
*/ */
#include <boost/interprocess/sync/interprocess_mutex.hpp> #include <boost/interprocess/sync/interprocess_mutex.hpp>
#include <boost/interprocess/sync/scoped_lock.hpp> #include <boost/interprocess/sync/scoped_lock.hpp>
#include <atomic>
#include <cerrno>
#include <csignal>
#include <ctime>
#include <mutex>
#include <unistd.h>
#include "shm_header.h" #include "shm_header.h"
namespace RuleStatShm { namespace RuleStatShm {
using namespace ShmHeader; using namespace ShmHeader;
namespace { /// 跨进程自旋锁——崩溃安全kill(pid,0) 检测持有者存活ESRCH 则接管)
struct ShmSpinLock {
std::atomic<pid_t> owner{0};
using ColdMutex = bipc::interprocess_mutex; ShmSpinLock() = default;
ColdMutex llmtx{}; ///< 进程间互斥锁 ShmSpinLock(const ShmSpinLock &) = delete;
ShmSpinLock &operator=(const ShmSpinLock &) = delete;
void lock() {
pid_t my_pid = ::getpid();
while (true) {
pid_t expected = 0;
if (owner.compare_exchange_strong(expected, my_pid,
std::memory_order_acquire)) {
return;
}
// 持有者进程已死 → 接管
if (::kill(expected, 0) != 0 && errno == ESRCH) {
if (owner.compare_exchange_strong(expected, my_pid,
std::memory_order_acquire)) {
return;
}
continue;
}
// 持有者存活,短暂退避
struct timespec ts = {0, 100000}; // 100µs
::nanosleep(&ts, nullptr);
}
}
void unlock() {
pid_t my_pid = ::getpid();
pid_t expected = my_pid;
owner.compare_exchange_strong(expected, 0, std::memory_order_release);
}
};
/// 双层锁:进程内 std::mutex + 跨进程 ShmSpinLock
struct DualLock {
std::mutex local;
ShmSpinLock &shm;
DualLock(ShmSpinLock &s) : shm(s) {}
void lock() {
local.lock();
shm.lock();
}
void unlock() {
shm.unlock();
local.unlock();
}
};
namespace {
const static std::string dir_path = "/users/dsc/shm"; const static std::string dir_path = "/users/dsc/shm";
const static std::string shm_file = "MapRuleStat"; ///<映射文件名 const static std::string shm_file = "MapRuleStat"; ///<映射文件名
@ -43,6 +100,9 @@ static void_allocator
static vec_allocator_s static vec_allocator_s
items_allocator(obj_mapped_file.get_segment_manager()); ///< vector_s分配器 items_allocator(obj_mapped_file.get_segment_manager()); ///< vector_s分配器
static ShmSpinLock *shm_lock =
obj_mapped_file.find_or_construct<ShmSpinLock>("RuleStatLock")();
thread_local static char_string tl_key_object("", default_allocator); thread_local static char_string tl_key_object("", default_allocator);
thread_local static char_string tl_key_delete("", default_allocator); thread_local static char_string tl_key_delete("", default_allocator);
@ -132,13 +192,14 @@ struct MapRuleStat {
private: private:
MapRuleStat_s *p_msg_map = obj_mapped_file.find_or_construct<MapRuleStat_s>( MapRuleStat_s *p_msg_map = obj_mapped_file.find_or_construct<MapRuleStat_s>(
shm_file.c_str())(less_s(), obj_mapped_file.get_segment_manager()); shm_file.c_str())(less_s(), obj_mapped_file.get_segment_manager());
DualLock lock_{*shm_lock};
public: public:
// ── mon 高频调用 ── // ── mon 高频调用 ──
bool add_stat_value(const std::string &key, const double &value) { bool add_stat_value(const std::string &key, const double &value) {
try { try {
bipc::scoped_lock<ColdMutex> guard(llmtx); std::lock_guard<DualLock> guard(lock_);
if (p_msg_map->operator[](get_thread_local_key(key)).fetch_mark) { if (p_msg_map->operator[](get_thread_local_key(key)).fetch_mark) {
p_msg_map->operator[](get_thread_local_key(key)).stat_values.clear(); p_msg_map->operator[](get_thread_local_key(key)).stat_values.clear();
p_msg_map->operator[](get_thread_local_key(key)).fetch_mark = false; p_msg_map->operator[](get_thread_local_key(key)).fetch_mark = false;
@ -156,7 +217,7 @@ public:
/// mon 写入累积的冷字段stat_values, running_time, shear_times /// mon 写入累积的冷字段stat_values, running_time, shear_times
bool update_cold_fields(const std::string &key, const RuleStatCold &value) { bool update_cold_fields(const std::string &key, const RuleStatCold &value) {
bipc::scoped_lock<ColdMutex> guard(llmtx); std::lock_guard<DualLock> guard(lock_);
try { try {
if (p_msg_map->find(get_thread_local_key(key)) != p_msg_map->end()) { if (p_msg_map->find(get_thread_local_key(key)) != p_msg_map->end()) {
p_msg_map->operator[](get_thread_local_key(key)).update_cold(value); p_msg_map->operator[](get_thread_local_key(key)).update_cold(value);
@ -171,7 +232,7 @@ public:
/// cron 写入静态字段running_time, alarm_times, last_alarm_time, dev_coder 等) /// cron 写入静态字段running_time, alarm_times, last_alarm_time, dev_coder 等)
bool update_static_fields(const std::string &key, const RuleStatCold &value) { bool update_static_fields(const std::string &key, const RuleStatCold &value) {
bipc::scoped_lock<ColdMutex> guard(llmtx); std::lock_guard<DualLock> guard(lock_);
try { try {
if (p_msg_map->find(get_thread_local_key(key)) != p_msg_map->end()) { if (p_msg_map->find(get_thread_local_key(key)) != p_msg_map->end()) {
p_msg_map->operator[](get_thread_local_key(key)).update_static(value); p_msg_map->operator[](get_thread_local_key(key)).update_static(value);
@ -188,7 +249,7 @@ public:
bool get_stat_value(const std::string &key, RuleStatCold &value) { bool get_stat_value(const std::string &key, RuleStatCold &value) {
try { try {
bipc::scoped_lock<ColdMutex> guard(llmtx); std::lock_guard<DualLock> guard(lock_);
if (!p_msg_map->operator[](get_thread_local_key(key)).fetch_mark && if (!p_msg_map->operator[](get_thread_local_key(key)).fetch_mark &&
p_msg_map->operator[](get_thread_local_key(key)).stat_values.size() > p_msg_map->operator[](get_thread_local_key(key)).stat_values.size() >
stat_size_min) { stat_size_min) {
@ -206,7 +267,7 @@ public:
// ── 管理操作 ── // ── 管理操作 ──
bool delete_data(const std::string &key) { bool delete_data(const std::string &key) {
bipc::scoped_lock<ColdMutex> guard(llmtx); std::lock_guard<DualLock> guard(lock_);
try { try {
if (p_msg_map->find(get_thread_local_delete_key(key)) != if (p_msg_map->find(get_thread_local_delete_key(key)) !=
p_msg_map->end()) { p_msg_map->end()) {
@ -219,18 +280,18 @@ public:
} }
size_t size() { size_t size() {
bipc::scoped_lock<ColdMutex> guard(llmtx); std::lock_guard<DualLock> guard(lock_);
return p_msg_map->size(); return p_msg_map->size();
} }
bool empty() { bool empty() {
bipc::scoped_lock<ColdMutex> guard(llmtx); std::lock_guard<DualLock> guard(lock_);
return p_msg_map->empty(); return p_msg_map->empty();
} }
std::vector<std::string> std::vector<std::string>
find_rule_id(const std::vector<std::string> &ruleid) { find_rule_id(const std::vector<std::string> &ruleid) {
bipc::scoped_lock<ColdMutex> guard(llmtx); std::lock_guard<DualLock> guard(lock_);
std::vector<std::string> res; std::vector<std::string> res;
if (p_msg_map->empty()) { if (p_msg_map->empty()) {
return {}; return {};
@ -246,7 +307,7 @@ public:
std::vector<std::string> std::vector<std::string>
find_no_rule_id(const std::vector<std::string> &ruleid) { find_no_rule_id(const std::vector<std::string> &ruleid) {
bipc::scoped_lock<ColdMutex> guard(llmtx); std::lock_guard<DualLock> guard(lock_);
std::vector<std::string> res; std::vector<std::string> res;
if (p_msg_map->empty()) { if (p_msg_map->empty()) {
return {}; return {};