65.9K
CodeProject 正在变化。 阅读更多。
Home

检测和解决多线程应用程序中死锁的方法

starIconstarIconstarIconstarIcon
emptyStarIcon
starIcon

4.63/5 (5投票s)

2001年5月27日

3分钟阅读

viewsIcon

91404

downloadIcon

1188

介绍了一种使用临界区作为同步对象来检测和解决多线程应用程序中死锁的方法。

引言

在本文中,我们介绍了一种解决线程死锁问题的方法。 我们将此方法应用于一个真实的多线程项目。 我们介绍了在使用关键段作为同步对象时,我们为死锁问题提供的解决方案,但提出的想法可以轻松地扩展到其他同步对象。 该方法的主要思想是开发我们自己的关键段类,该类派生自 MFC CCriticalSection 类,并在该类中实现一个静态容器,用于保存每个创建的实例对象的状态信息。 在重写的函数 Lock()Unlock() 中,访问静态容器并更新与当前对象相关的状态信息。 静态容器中的信息可以从一个特殊的线程保存到一个文件中。 这个特殊的线程被设置为等待一个系统事件,以便保存状态信息并解锁容器中的所有关键段对象。 系统事件从一个独立的小型控制台应用程序外部设置到应用程序中。

实现

为了使使用工作线程更容易,更面向对象,我们在自己的工作线程类中开发了我们自己的工作线程类,在该类中我们封装了一些线程 Windows API 函数

class CWThread
{
public:
  //Constructor
  CWThread() : m_hThread(NULL), m_bSuspended(TRUE) {}

  //Destructor
  virtual ~CWThread()
  {
    Stop();
  }

  //Create
  BOOL Create(LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpParameter=NULL);

  //Resume the thread
  BOOL Resume();

  //Start the thread
  BOOL Start()
  {
    //Technically is the same as Resume()
    return Resume();
  }

  //Suspend the thread
  BOOL Suspend();

  //Get thread priority
  int GetPriority()
  {
    return GetThreadPriority(m_hThread);
  }

  //Set thread priority
  BOOL SetPriority(int iPriority)
  {
    return (TRUE == SetThreadPriority(m_hThread, iPriority));
  }

  //Stop the thread
  BOOL Stop();

  //Check if is created
  BOOL IsCreated()
  {
    return (m_hThread != NULL);
  }

  //Check if is suspended
  BOOL IsSuspended()
  {
    return m_bSuspended;
  }

private:
  CWThread& operator=(const CWThread&); //disallow copy
  //Handle to the thread
  HANDLE m_hThread;
  //Suspension Flag
  BOOL m_bSuspended;
};

//Create
BOOL CWThread::Create(LPTHREAD_START_ROUTINE lpStartAddress, LPVOID lpParameter)
{
  if(NULL == m_hThread)
  {
    DWORD dwThreadID;
    //Always created in a Suspended State
    m_hThread = CreateThread((LPSECURITY_ATTRIBUTES)NULL, (DWORD)0, lpStartAddress,
      lpParameter, (DWORD)CREATE_SUSPENDED, (LPDWORD)&dwThreadID);
    if(m_hThread != NULL)
    {
      //Initialized to Normal Priority
      SetThreadPriority(m_hThread, THREAD_PRIORITY_NORMAL);
      return FALSE; //OK
    }
  }
  return TRUE; //ERROR
}

//Resume the thread
BOOL CWThread::Resume()
{
  if((m_hThread!=NULL)&&(TRUE==m_bSuspended))
  {
    if(ResumeThread(m_hThread)!=0xFFFFFFFF)
    {
      m_bSuspended = FALSE;
      return FALSE; //OK
    }
  }
  return TRUE; //ERROR
}

//Suspend the thread
BOOL CWThread::Suspend()
{
  if((m_hThread!=NULL)&&(FALSE==m_bSuspended))
  {
    //Set the Flag before suspending (otherwise is not working)
    m_bSuspended = TRUE;
    if(SuspendThread(m_hThread)!=0xFFFFFFFF)
      return FALSE; //OK
    //If not successfull Reset the flag
    m_bSuspended = FALSE;
  }
  return TRUE; //ERROR
}

//Stop the thread
BOOL CWThread::Stop()
{
  if(m_hThread != NULL)
  {
    TerminateThread(m_hThread, 1);
    //Closing the Thread Handle
    CloseHandle(m_hThread);
    m_hThread = NULL;
    m_bSuspended = TRUE;
    return FALSE; //OK
  }
  return TRUE; //ERROR
}

在我们的关键段类中,我们使用 STL map 作为容器,在其中将唯一的实例 ID 映射到数据结构。 在数据结构中,我们保留一些信息字符串和指向对象实例的指针。 预处理器标志 __INFOCRITSECT_DEBUG__ 用于激活 Lock()Unlock() 函数中的状态更新操作。

我们的关键段类

#define __INFOCRITSECT_DEBUG__

#pragma warning(disable:4786)

#include "Afxmt.h"
#include "WThread.h"
#include <string>
#include <strstream>
#include <fstream>
#include <map>

using namespace std;

struct SData
{
  //CONSTRUCTOR
  SData(string const& rostrBeforeLock="No Init", string const& rostrAfterLock="No Init",
    string const& rostrBeforeUnlock="No Init", string const& rostrAfterUnlock="No Init",
    string const& rostrDesc="No Desc") : m_ostrBeforeLock(rostrBeforeLock), 
    m_ostrAfterLock(rostrAfterLock), m_ostrBeforeUnlock(rostrBeforeUnlock), 
    m_ostrAfterUnlock(rostrAfterUnlock), m_ostrDesc(rostrDesc),
    m_poCriticalSection(NULL) {}
  string m_ostrBeforeLock;
  string m_ostrAfterLock;
  string m_ostrBeforeUnlock;
  string m_ostrAfterUnlock;
  string m_ostrDesc;
  CCriticalSection* m_poCriticalSection;
};

//CInfoCritSect class - Implementing the Singleton Design Pattern
class CInfoCritSect : public CCriticalSection
{
public:
  //CONSTRUCTOR
  CInfoCritSect(string const& rostrDesc="No Desc")
  {
    //Add a new data element in the map
    sm_oCritSect.Lock();
      m_iIndex = sm_iCount;
      sm_iCount++;
      sm_oMap[m_iIndex] = SData();
      sm_oMap[m_iIndex].m_ostrDesc = rostrDesc;
      sm_oMap[m_iIndex].m_poCriticalSection = this;
    sm_oCritSect.Unlock();
  }

  void SetDescription(string const& rostrDesc)
  {
    //Remove the data element from the map
    map<unsigned int, SData>::iterator it;
    sm_oCritSect.Lock();
      it = sm_oMap.find(m_iIndex);
      if(it != sm_oMap.end())
        it->second.m_ostrDesc = rostrDesc;
    sm_oCritSect.Unlock();
  }

  //DESTRUCTOR
  ~CInfoCritSect()
  {
    //Remove the data element from the map
    map<unsigned int, SData>::iterator it;
    sm_oCritSect.Lock();
      it = sm_oMap.find(m_iIndex);
      if(it != sm_oMap.end())
        sm_oMap.erase(it);
    sm_oCritSect.Unlock();
  }
	
  void Lock(unsigned int uiLine=0, string const& rostrFileName="");

  void Unlock(unsigned int uiLine=0, string const& rostrFileName="");

  void BeforeLock(unsigned int uiLine, string const& rostrFileName)
  {
    //Find the position in map
    map<unsigned int, SData>::iterator it;
    sm_oCritSect.Lock();
    it = sm_oMap.find(m_iIndex);
    if(it != sm_oMap.end())
    {
      DWORD dwId = ::GetCurrentThreadId();
      ostrstream ostr;
      ostr << "Trying to Lock: ID=" << ::GetCurrentThreadId()
        << ", File=" << rostrFileName << ", Line=" << uiLine 
        << ", Time=" << ::GetTickCount();
      ostr << ends;
      it->second.m_ostrBeforeLock = ostr.str();
      ostr.freeze(0);
    }
    sm_oCritSect.Unlock();
  }

  void AfterLock(unsigned int uiLine, string const& rostrFileName)
  {
    //Find the position in map
    map<unsigned int, SData>::iterator it;
    sm_oCritSect.Lock();
    it = sm_oMap.find(m_iIndex);
    if(it != sm_oMap.end())
    {
      DWORD dwId = ::GetCurrentThreadId();
      ostrstream ostr;
      ostr << "Locked: ID=" << ::GetCurrentThreadId()
        << ", File=" << rostrFileName << ", Line=" << uiLine 
        << ", Time=" << ::GetTickCount();
      ostr << ends;
      it->second.m_ostrAfterLock = ostr.str();
      ostr.freeze(0);
    }
    sm_oCritSect.Unlock();
  }

  void BeforeUnlock(unsigned int uiLine, string const& rostrFileName)
  {
    //Find the position in map
    map<unsigned int, SData>::iterator it;
    sm_oCritSect.Lock();
    it = sm_oMap.find(m_iIndex);
    if(it != sm_oMap.end())
    {
      DWORD dwId = ::GetCurrentThreadId();
      ostrstream ostr;
      ostr << "Trying to Unlock: ID=" << ::GetCurrentThreadId()
        << ", File=" << rostrFileName << ", Line=" << uiLine 
        << ", Time=" << ::GetTickCount();
      ostr << ends;
      it->second.m_ostrBeforeUnlock = ostr.str();
      ostr.freeze(0);
    }
    sm_oCritSect.Unlock();
  }

  void AfterUnlock(unsigned int uiLine, string const& rostrFileName)
  {
    //Find the position in map
    map<unsigned int, SData>::iterator it;
    sm_oCritSect.Lock();
      it = sm_oMap.find(m_iIndex);
      if(it != sm_oMap.end())
      {
        DWORD dwId = ::GetCurrentThreadId();
        ostrstream ostr;
        ostr << "Unlocked: ID=" << ::GetCurrentThreadId()
          << ", File=" << rostrFileName << ", Line=" << uiLine 
          << ", Time=" << ::GetTickCount();
        ostr << ends;
        it->second.m_ostrAfterUnlock = ostr.str();
        ostr.freeze(0);
      }
    sm_oCritSect.Unlock();
  }

  static void PrintInfo()
  {
    //Open Output File
    ofstream fout("Deadlocks.out");
    if(!fout)
      return;
    sm_oCritSect.Lock();
      map<unsigned int, SData>::iterator it = sm_oMap.begin();
      while(it != sm_oMap.end())
      {			
        fout << "Critical Section: " << it->second.m_ostrDesc << endl;
        fout << " " << it->second.m_ostrBeforeLock << endl;
        fout << " " << it->second.m_ostrAfterLock << endl;
        fout << " " << it->second.m_ostrBeforeUnlock << endl;
        fout << " " << it->second.m_ostrAfterUnlock << endl;
        fout << endl;
        it++;
      }
    sm_oCritSect.Unlock();
    fout.close();
  }

  static void UnlockAll()
  {
    sm_oCritSect.Lock();
      map<unsigned int, SData>::iterator it = sm_oMap.begin();
      while(it != sm_oMap.end())
      {
        it->second.m_poCriticalSection->Unlock();
        it++;
      }
    sm_oCritSect.Unlock();
  }

private:
  static CCriticalSection sm_oCritSect;
  static map<unsigned int, SData> sm_oMap;
  static unsigned int sm_iCount;
  unsigned int m_iIndex;
};

inline void CInfoCritSect::Lock(unsigned int uiLine, string const& rostrFileName)
{
#ifdef __INFOCRITSECT_DEBUG__
  BeforeLock(uiLine, rostrFileName);
#endif
  CCriticalSection::Lock();
#ifdef __INFOCRITSECT_DEBUG__
  AfterLock(uiLine, rostrFileName);
#endif
}

inline void CInfoCritSect::Unlock(unsigned int uiLine, string const& rostrFileName)
{
#ifdef __INFOCRITSECT_DEBUG__
  BeforeUnlock(uiLine, rostrFileName);
#endif
  CCriticalSection::Unlock();
#ifdef __INFOCRITSECT_DEBUG__
  AfterUnlock(uiLine, rostrFileName);
#endif
}

unsigned int CInfoCritSect::sm_iCount = 0;

map<unsigned int, SData> CInfoCritSect::sm_oMap;

CCriticalSection CInfoCritSect::sm_oCritSect;

特殊的解锁线程派生自我们上面介绍的通用工作线程类,并实现了 Singleton 设计模式

class CDeadlocksThread : public CWThread
{
private:
  //CONSTRUCTOR - Private to prevent Construction from outside
  CDeadlocksThread();
	
public:
  //Create
  bool Create();
  //DESTRUCTOR
  ~CDeadlocksThread();
  //Getting the address of the unique instance
  static CDeadlocksThread* GetInstance();
	
private:
};

#include "stdafx.h"
#include "DeadlocksThread.h"
#include "InfoCritSect.h"

//Thread Function
UINT DeadlocksThreadProc(LPVOID pParam);

//CONSTRUCTOR
CDeadlocksThread::CDeadlocksThread()
{
}

//DESTRUCTOR
CDeadlocksThread::~CDeadlocksThread()
{
}

//Getting the address of the unique instance
CDeadlocksThread* CDeadlocksThread::GetInstance()
{
  static CDeadlocksThread soDeadlocksThread;
  return &soDeadlocksThread;
}

//Create
bool CDeadlocksThread::Create()
{
  if(TRUE == CWThread::Create((LPTHREAD_START_ROUTINE)DeadlocksThreadProc))
    return true;
  else
    return false;
}

//The Thread Function
UINT DeadlocksThreadProc(LPVOID pParam)
{
  HANDLE hEv = ::CreateEvent(NULL, FALSE, FALSE, "DeadlockDetection");
  while(true)
  {
    ::WaitForSingleObject(hEv, INFINITE);
    CInfoCritSect::PrintInfo();
    CInfoCritSect::UnlockAll();
  }
  return 0;
}

DeadlocksThreadProc() 中,当系统事件被设置时,会调用静态函数 PrintInfo()UnlockAll(),分别用于将状态信息保存在文件中,并解锁所有实例对象。 用于触发事件的控制台应用程序非常简单

#include "windows.h"
#include <iostream>

using namespace std;

void main(int argc, char* argv[])
{
  HANDLE hEv = ::OpenEvent(EVENT_ALL_ACCESS, FALSE, "DeadlockDetection");
  if(hEv != NULL)
  {
    ::SetEvent(hEv);
    cout << "DeadlockDetection event set" << endl;
  }
  else
    cout << "Cannot open DeadlockDetection event" << endl;
  ::CloseHandle(hEv);
}

系统事件使用相同的名称。

在与本文相关的示例项目 TestDeadlocks.zip 中,是一个基于对话框的简单 MFC 应用程序。 当按下“启动线程”按钮时,会启动一个单独的线程,在该线程中,我们故意忘记在锁定后解锁数据存储库的关键段。 由于数据存储库也由用户界面线程在 OnPaint() 处理程序中访问,因此效果是应用程序的用户界面将被冻结。 该线程也派生自我们的通用工作线程类

class CThread1 : public CWThread
{
public:
  //CONSTRUCTOR
  CThread1();
  //Create
  bool Create();
  //DESTRUCTOR
  ~CThread1();
	
private:
};

//Thread Function
UINT Thread1Proc(LPVOID pParam);

//CONSTRUCTOR
CThread1::CThread1()
{
}

//DESTRUCTOR
CThread1::~CThread1()
{
}

//Create
bool CThread1::Create()
{
  if(TRUE == CWThread::Create((LPTHREAD_START_ROUTINE)Thread1Proc))
    return true;
  else
    return false;
}

//The Thread Function
UINT Thread1Proc(LPVOID pParam)
{
  CDataReservoir* poDataReservoir = CDataReservoir::GetInstance();
  poDataReservoir->Lock(__LINE__, __FILE__);
  //Forgets to Unlock()
  //poDataReservoir->Unlock(__LINE__, __FILE__);
  TRACE("\nThread1");
  return 0;
}

//Or in an Infinite Loop
/*
//The Thread Function
UINT Thread1Proc(LPVOID pParam)
{
  //Infinite loop
  while(TRUE)
  {
    CDataReservoir* poDataReservoir = CDataReservoir::GetInstance();
    poDataReservoir->Lock(__LINE__, __FILE__);
    //Forgets to Unlock()
    //poDataReservoir->Unlock(__LINE__, __FILE__);
    TRACE("\nThread1");
    Sleep(50);		
  }
  return 0;
}
*/

我们的数据存储库类不包含任何真实数据,仅用于演示目的。 可以在实际应用程序中轻松添加特定数据。 对此真实数据的访问将受到 m_oInfoCritSect 关键段成员的保护。

//CDataReservoir class - Implementing the Singleton Design Pattern
class CDataReservoir
{
private:
  //CONSTRUCTOR - Private to prevent Construction from outside
  CDataReservoir();
	
public:
  //DESTRUCTOR
  ~CDataReservoir();
  //Getting the address of the unique instance
  static CDataReservoir* GetInstance();
  void Lock(unsigned int uiLine=0, string const& rostrFileName="");
  void Unlock(unsigned int uiLine=0, string const& rostrFileName="");

private:
  CInfoCritSect m_oInfoCritSect;
};

inline void CDataReservoir::Lock(unsigned int uiLine, string const& rostrFileName)
{
  m_oInfoCritSect.Lock(uiLine, rostrFileName);
}

inline void CDataReservoir::Unlock(unsigned int uiLine, string const& rostrFileName)
{
  m_oInfoCritSect.Unlock(uiLine, rostrFileName);
}

//CONSTRUCTOR
CDataReservoir::CDataReservoir()
{
  m_oInfoCritSect.SetDescription("DataReservoir CS");
}

//DESTRUCTOR
CDataReservoir::~CDataReservoir()
{
}

//Getting the address of the unique instance
CDataReservoir* CDataReservoir::GetInstance()
{
  static CDataReservoir soDataReservoir;
  return &soDataReservoir;
}

解锁线程在 OnInitDialog() 处理程序中创建并启动

CDeadlocksThread::GetInstance()->Create();
CDeadlocksThread::GetInstance()->Start();

冻结线程通过按下“启动线程”按钮来启动

void CTestDlg::OnButton1()
{
  m_oThread1.Stop();
  m_oThread1.Create();
  m_oThread1.Start();
}

我们可以启动应用程序并按下“启动线程”按钮。 之后,我们将看到用户界面不再更新。 要解锁应用程序,我们应该运行 FireEvent.exe 控制台应用程序。 之后,用户界面将更新,并且文件 Deadlocks out 将被写入磁盘。 此文件中的信息类似于

Critical Section: DataReservoir CS
 Trying to Lock: ID=1104, 
    File=C:\CodeProject\Deadlocks\TestDeadlocks\TestDlg.cpp, Line=168, Time=9481373
 Locked: ID=1100, 
    File=C:\CodeProject\Deadlocks\TestDeadlocks\Thread1.cpp, Line=34, Time=9479530
 Trying to Unlock: ID=1104, 
    File=C:\CodeProject\Deadlocks\TestDeadlocks\TestDlg.cpp, Line=169, Time=9477357
 Unlocked: ID=1104, 
    File=C:\CodeProject\Deadlocks\TestDeadlocks\TestDlg.cpp, Line=169, Time=9477357

并且可以很容易地解释。 我们可以很容易地看到,ID=1104 的用户界面线程正在尝试锁定关键段,但无法做到,因为关键段已被 ID=1100 的工作线程锁定,之后再也没有解锁,因此我们发生了死锁,因为资源没有被 ID=1100 的线程释放。

我们希望您会发现本文中提供的信息有用!

© . All rights reserved.