检测和解决多线程应用程序中死锁的方法






4.63/5 (5投票s)
2001年5月27日
3分钟阅读

91404

1188
介绍了一种使用临界区作为同步对象来检测和解决多线程应用程序中死锁的方法。
引言
在本文中,我们介绍了一种解决线程死锁问题的方法。 我们将此方法应用于一个真实的多线程项目。 我们介绍了在使用关键段作为同步对象时,我们为死锁问题提供的解决方案,但提出的想法可以轻松地扩展到其他同步对象。 该方法的主要思想是开发我们自己的关键段类,该类派生自 MFC CCriticalSection
类,并在该类中实现一个静态容器,用于保存每个创建的实例对象的状态信息。 在重写的函数 Lock()
和 Unlock()
中,访问静态容器并更新与当前对象相关的状态信息。 静态容器中的信息可以从一个特殊的线程保存到一个文件中。 这个特殊的线程被设置为等待一个系统事件,以便保存状态信息并解锁容器中的所有关键段对象。 系统事件从一个独立的小型控制台应用程序外部设置到应用程序中。
实现
为了使使用工作线程更容易,更面向对象,我们在自己的工作线程类中开发了我们自己的工作线程类,在该类中我们封装了一些线程 Windows API 函数
class CWThread { public: //Constructor CWThread() : m_hThread(NULL), m_bSuspended(TRUE) {} //Destructor virtual ~CWThread() { Stop(); } //Create BOOL Create(LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpParameter=NULL); //Resume the thread BOOL Resume(); //Start the thread BOOL Start() { //Technically is the same as Resume() return Resume(); } //Suspend the thread BOOL Suspend(); //Get thread priority int GetPriority() { return GetThreadPriority(m_hThread); } //Set thread priority BOOL SetPriority(int iPriority) { return (TRUE == SetThreadPriority(m_hThread, iPriority)); } //Stop the thread BOOL Stop(); //Check if is created BOOL IsCreated() { return (m_hThread != NULL); } //Check if is suspended BOOL IsSuspended() { return m_bSuspended; } private: CWThread& operator=(const CWThread&); //disallow copy //Handle to the thread HANDLE m_hThread; //Suspension Flag BOOL m_bSuspended; }; //Create BOOL CWThread::Create(LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpParameter) { if(NULL == m_hThread) { DWORD dwThreadID; //Always created in a Suspended State m_hThread = CreateThread((LPSECURITY_ATTRIBUTES)NULL, (DWORD)0, lpStartAddress, lpParameter, (DWORD)CREATE_SUSPENDED, (LPDWORD)&dwThreadID); if(m_hThread != NULL) { //Initialized to Normal Priority SetThreadPriority(m_hThread, THREAD_PRIORITY_NORMAL); return FALSE; //OK } } return TRUE; //ERROR } //Resume the thread BOOL CWThread::Resume() { if((m_hThread!=NULL)&&(TRUE==m_bSuspended)) { if(ResumeThread(m_hThread)!=0xFFFFFFFF) { m_bSuspended = FALSE; return FALSE; //OK } } return TRUE; //ERROR } //Suspend the thread BOOL CWThread::Suspend() { if((m_hThread!=NULL)&&(FALSE==m_bSuspended)) { //Set the Flag before suspending (otherwise is not working) m_bSuspended = TRUE; if(SuspendThread(m_hThread)!=0xFFFFFFFF) return FALSE; //OK //If not successfull Reset the flag m_bSuspended = FALSE; } return TRUE; //ERROR } //Stop the thread BOOL CWThread::Stop() { if(m_hThread != NULL) { TerminateThread(m_hThread, 1); //Closing the Thread Handle CloseHandle(m_hThread); m_hThread = NULL; m_bSuspended = TRUE; return FALSE; //OK } return TRUE; //ERROR }
在我们的关键段类中,我们使用 STL map 作为容器,在其中将唯一的实例 ID 映射到数据结构。 在数据结构中,我们保留一些信息字符串和指向对象实例的指针。 预处理器标志 __INFOCRITSECT_DEBUG__
用于激活 Lock()
和 Unlock()
函数中的状态更新操作。
我们的关键段类
#define __INFOCRITSECT_DEBUG__ #pragma warning(disable:4786) #include "Afxmt.h" #include "WThread.h" #include <string> #include <strstream> #include <fstream> #include <map> using namespace std; struct SData { //CONSTRUCTOR SData(string const& rostrBeforeLock="No Init", string const& rostrAfterLock="No Init", string const& rostrBeforeUnlock="No Init", string const& rostrAfterUnlock="No Init", string const& rostrDesc="No Desc") : m_ostrBeforeLock(rostrBeforeLock), m_ostrAfterLock(rostrAfterLock), m_ostrBeforeUnlock(rostrBeforeUnlock), m_ostrAfterUnlock(rostrAfterUnlock), m_ostrDesc(rostrDesc), m_poCriticalSection(NULL) {} string m_ostrBeforeLock; string m_ostrAfterLock; string m_ostrBeforeUnlock; string m_ostrAfterUnlock; string m_ostrDesc; CCriticalSection* m_poCriticalSection; }; //CInfoCritSect class - Implementing the Singleton Design Pattern class CInfoCritSect : public CCriticalSection { public: //CONSTRUCTOR CInfoCritSect(string const& rostrDesc="No Desc") { //Add a new data element in the map sm_oCritSect.Lock(); m_iIndex = sm_iCount; sm_iCount++; sm_oMap[m_iIndex] = SData(); sm_oMap[m_iIndex].m_ostrDesc = rostrDesc; sm_oMap[m_iIndex].m_poCriticalSection = this; sm_oCritSect.Unlock(); } void SetDescription(string const& rostrDesc) { //Remove the data element from the map map<unsigned int, SData>::iterator it; sm_oCritSect.Lock(); it = sm_oMap.find(m_iIndex); if(it != sm_oMap.end()) it->second.m_ostrDesc = rostrDesc; sm_oCritSect.Unlock(); } //DESTRUCTOR ~CInfoCritSect() { //Remove the data element from the map map<unsigned int, SData>::iterator it; sm_oCritSect.Lock(); it = sm_oMap.find(m_iIndex); if(it != sm_oMap.end()) sm_oMap.erase(it); sm_oCritSect.Unlock(); } void Lock(unsigned int uiLine=0, string const& rostrFileName=""); void Unlock(unsigned int uiLine=0, string const& rostrFileName=""); void BeforeLock(unsigned int uiLine, string const& rostrFileName) { //Find the position in map map<unsigned int, SData>::iterator it; sm_oCritSect.Lock(); it = sm_oMap.find(m_iIndex); if(it != sm_oMap.end()) { DWORD dwId = ::GetCurrentThreadId(); ostrstream ostr; ostr << "Trying to Lock: ID=" << ::GetCurrentThreadId() << ", File=" << rostrFileName << ", Line=" << uiLine << ", Time=" << ::GetTickCount(); ostr << ends; it->second.m_ostrBeforeLock = ostr.str(); ostr.freeze(0); } sm_oCritSect.Unlock(); } void AfterLock(unsigned int uiLine, string const& rostrFileName) { //Find the position in map map<unsigned int, SData>::iterator it; sm_oCritSect.Lock(); it = sm_oMap.find(m_iIndex); if(it != sm_oMap.end()) { DWORD dwId = ::GetCurrentThreadId(); ostrstream ostr; ostr << "Locked: ID=" << ::GetCurrentThreadId() << ", File=" << rostrFileName << ", Line=" << uiLine << ", Time=" << ::GetTickCount(); ostr << ends; it->second.m_ostrAfterLock = ostr.str(); ostr.freeze(0); } sm_oCritSect.Unlock(); } void BeforeUnlock(unsigned int uiLine, string const& rostrFileName) { //Find the position in map map<unsigned int, SData>::iterator it; sm_oCritSect.Lock(); it = sm_oMap.find(m_iIndex); if(it != sm_oMap.end()) { DWORD dwId = ::GetCurrentThreadId(); ostrstream ostr; ostr << "Trying to Unlock: ID=" << ::GetCurrentThreadId() << ", File=" << rostrFileName << ", Line=" << uiLine << ", Time=" << ::GetTickCount(); ostr << ends; it->second.m_ostrBeforeUnlock = ostr.str(); ostr.freeze(0); } sm_oCritSect.Unlock(); } void AfterUnlock(unsigned int uiLine, string const& rostrFileName) { //Find the position in map map<unsigned int, SData>::iterator it; sm_oCritSect.Lock(); it = sm_oMap.find(m_iIndex); if(it != sm_oMap.end()) { DWORD dwId = ::GetCurrentThreadId(); ostrstream ostr; ostr << "Unlocked: ID=" << ::GetCurrentThreadId() << ", File=" << rostrFileName << ", Line=" << uiLine << ", Time=" << ::GetTickCount(); ostr << ends; it->second.m_ostrAfterUnlock = ostr.str(); ostr.freeze(0); } sm_oCritSect.Unlock(); } static void PrintInfo() { //Open Output File ofstream fout("Deadlocks.out"); if(!fout) return; sm_oCritSect.Lock(); map<unsigned int, SData>::iterator it = sm_oMap.begin(); while(it != sm_oMap.end()) { fout << "Critical Section: " << it->second.m_ostrDesc << endl; fout << " " << it->second.m_ostrBeforeLock << endl; fout << " " << it->second.m_ostrAfterLock << endl; fout << " " << it->second.m_ostrBeforeUnlock << endl; fout << " " << it->second.m_ostrAfterUnlock << endl; fout << endl; it++; } sm_oCritSect.Unlock(); fout.close(); } static void UnlockAll() { sm_oCritSect.Lock(); map<unsigned int, SData>::iterator it = sm_oMap.begin(); while(it != sm_oMap.end()) { it->second.m_poCriticalSection->Unlock(); it++; } sm_oCritSect.Unlock(); } private: static CCriticalSection sm_oCritSect; static map<unsigned int, SData> sm_oMap; static unsigned int sm_iCount; unsigned int m_iIndex; }; inline void CInfoCritSect::Lock(unsigned int uiLine, string const& rostrFileName) { #ifdef __INFOCRITSECT_DEBUG__ BeforeLock(uiLine, rostrFileName); #endif CCriticalSection::Lock(); #ifdef __INFOCRITSECT_DEBUG__ AfterLock(uiLine, rostrFileName); #endif } inline void CInfoCritSect::Unlock(unsigned int uiLine, string const& rostrFileName) { #ifdef __INFOCRITSECT_DEBUG__ BeforeUnlock(uiLine, rostrFileName); #endif CCriticalSection::Unlock(); #ifdef __INFOCRITSECT_DEBUG__ AfterUnlock(uiLine, rostrFileName); #endif } unsigned int CInfoCritSect::sm_iCount = 0; map<unsigned int, SData> CInfoCritSect::sm_oMap; CCriticalSection CInfoCritSect::sm_oCritSect;
特殊的解锁线程派生自我们上面介绍的通用工作线程类,并实现了 Singleton 设计模式
class CDeadlocksThread : public CWThread { private: //CONSTRUCTOR - Private to prevent Construction from outside CDeadlocksThread(); public: //Create bool Create(); //DESTRUCTOR ~CDeadlocksThread(); //Getting the address of the unique instance static CDeadlocksThread* GetInstance(); private: }; #include "stdafx.h" #include "DeadlocksThread.h" #include "InfoCritSect.h" //Thread Function UINT DeadlocksThreadProc(LPVOID pParam); //CONSTRUCTOR CDeadlocksThread::CDeadlocksThread() { } //DESTRUCTOR CDeadlocksThread::~CDeadlocksThread() { } //Getting the address of the unique instance CDeadlocksThread* CDeadlocksThread::GetInstance() { static CDeadlocksThread soDeadlocksThread; return &soDeadlocksThread; } //Create bool CDeadlocksThread::Create() { if(TRUE == CWThread::Create((LPTHREAD_START_ROUTINE)DeadlocksThreadProc)) return true; else return false; } //The Thread Function UINT DeadlocksThreadProc(LPVOID pParam) { HANDLE hEv = ::CreateEvent(NULL, FALSE, FALSE, "DeadlockDetection"); while(true) { ::WaitForSingleObject(hEv, INFINITE); CInfoCritSect::PrintInfo(); CInfoCritSect::UnlockAll(); } return 0; }
在 DeadlocksThreadProc()
中,当系统事件被设置时,会调用静态函数 PrintInfo()
和 UnlockAll()
,分别用于将状态信息保存在文件中,并解锁所有实例对象。 用于触发事件的控制台应用程序非常简单
#include "windows.h" #include <iostream> using namespace std; void main(int argc, char* argv[]) { HANDLE hEv = ::OpenEvent(EVENT_ALL_ACCESS, FALSE, "DeadlockDetection"); if(hEv != NULL) { ::SetEvent(hEv); cout << "DeadlockDetection event set" << endl; } else cout << "Cannot open DeadlockDetection event" << endl; ::CloseHandle(hEv); }
系统事件使用相同的名称。
在与本文相关的示例项目 TestDeadlocks.zip 中,是一个基于对话框的简单 MFC 应用程序。 当按下“启动线程”按钮时,会启动一个单独的线程,在该线程中,我们故意忘记在锁定后解锁数据存储库的关键段。 由于数据存储库也由用户界面线程在 OnPaint()
处理程序中访问,因此效果是应用程序的用户界面将被冻结。 该线程也派生自我们的通用工作线程类
class CThread1 : public CWThread { public: //CONSTRUCTOR CThread1(); //Create bool Create(); //DESTRUCTOR ~CThread1(); private: }; //Thread Function UINT Thread1Proc(LPVOID pParam); //CONSTRUCTOR CThread1::CThread1() { } //DESTRUCTOR CThread1::~CThread1() { } //Create bool CThread1::Create() { if(TRUE == CWThread::Create((LPTHREAD_START_ROUTINE)Thread1Proc)) return true; else return false; } //The Thread Function UINT Thread1Proc(LPVOID pParam) { CDataReservoir* poDataReservoir = CDataReservoir::GetInstance(); poDataReservoir->Lock(__LINE__, __FILE__); //Forgets to Unlock() //poDataReservoir->Unlock(__LINE__, __FILE__); TRACE("\nThread1"); return 0; } //Or in an Infinite Loop /* //The Thread Function UINT Thread1Proc(LPVOID pParam) { //Infinite loop while(TRUE) { CDataReservoir* poDataReservoir = CDataReservoir::GetInstance(); poDataReservoir->Lock(__LINE__, __FILE__); //Forgets to Unlock() //poDataReservoir->Unlock(__LINE__, __FILE__); TRACE("\nThread1"); Sleep(50); } return 0; } */
我们的数据存储库类不包含任何真实数据,仅用于演示目的。 可以在实际应用程序中轻松添加特定数据。 对此真实数据的访问将受到 m_oInfoCritSect
关键段成员的保护。
//CDataReservoir class - Implementing the Singleton Design Pattern class CDataReservoir { private: //CONSTRUCTOR - Private to prevent Construction from outside CDataReservoir(); public: //DESTRUCTOR ~CDataReservoir(); //Getting the address of the unique instance static CDataReservoir* GetInstance(); void Lock(unsigned int uiLine=0, string const& rostrFileName=""); void Unlock(unsigned int uiLine=0, string const& rostrFileName=""); private: CInfoCritSect m_oInfoCritSect; }; inline void CDataReservoir::Lock(unsigned int uiLine, string const& rostrFileName) { m_oInfoCritSect.Lock(uiLine, rostrFileName); } inline void CDataReservoir::Unlock(unsigned int uiLine, string const& rostrFileName) { m_oInfoCritSect.Unlock(uiLine, rostrFileName); } //CONSTRUCTOR CDataReservoir::CDataReservoir() { m_oInfoCritSect.SetDescription("DataReservoir CS"); } //DESTRUCTOR CDataReservoir::~CDataReservoir() { } //Getting the address of the unique instance CDataReservoir* CDataReservoir::GetInstance() { static CDataReservoir soDataReservoir; return &soDataReservoir; }
解锁线程在 OnInitDialog()
处理程序中创建并启动
CDeadlocksThread::GetInstance()->Create(); CDeadlocksThread::GetInstance()->Start();
冻结线程通过按下“启动线程”按钮来启动
void CTestDlg::OnButton1()
{
m_oThread1.Stop();
m_oThread1.Create();
m_oThread1.Start();
}
我们可以启动应用程序并按下“启动线程”按钮。 之后,我们将看到用户界面不再更新。 要解锁应用程序,我们应该运行 FireEvent.exe 控制台应用程序。 之后,用户界面将更新,并且文件 Deadlocks out 将被写入磁盘。 此文件中的信息类似于
Critical Section: DataReservoir CS
Trying to Lock: ID=1104,
File=C:\CodeProject\Deadlocks\TestDeadlocks\TestDlg.cpp, Line=168, Time=9481373
Locked: ID=1100,
File=C:\CodeProject\Deadlocks\TestDeadlocks\Thread1.cpp, Line=34, Time=9479530
Trying to Unlock: ID=1104,
File=C:\CodeProject\Deadlocks\TestDeadlocks\TestDlg.cpp, Line=169, Time=9477357
Unlocked: ID=1104,
File=C:\CodeProject\Deadlocks\TestDeadlocks\TestDlg.cpp, Line=169, Time=9477357
并且可以很容易地解释。 我们可以很容易地看到,ID=1104 的用户界面线程正在尝试锁定关键段,但无法做到,因为关键段已被 ID=1100 的工作线程锁定,之后再也没有解锁,因此我们发生了死锁,因为资源没有被 ID=1100 的线程释放。
我们希望您会发现本文中提供的信息有用!