如何识别网页选择中的不同元素?






4.64/5 (4投票s)
2005年3月22日

73531

1122
如何识别网页选择中的不同元素?
引言
首先,我获取浏览器组件的 IHTMLDocument2
接口,然后获取该接口的 selection
属性以获取 IHTMLSelectionObject
接口。现在到了困难的部分,实际解析这些内容。我假设如果我能够使用 createRange
方法为所选内容创建一个控制范围,那么我就能够获得一个列表,其中 HTML 标签及其属性会整齐地分隔开。
现在,使用 IMarkupServices
,我可以enum
网页浏览器所选部分的全部元素。以下是代码
//File:IESelection.CPP #include <afxwin.h> #include <afxdisp.h> #include <atlbase.h> //You may derive a class from CComModule and use it //if you want to override //something, but do not change the name of _Module extern CComModule _Module; #include <atlcom.h> /// #include <mshtml.h> #include <MsHtmcid.h> /// HRESULT GetMarkupServices(IDispatch *pDocument, IMarkupServices ** pMarkupServices ); HRESULT EnumSelectionElements(IDispatch *pDocument, CString &msg); HRESULT PrintElement(IHTMLElement *pElement, CString &msg); /// HRESULT EnumSelectionElements(IDispatch * pDocument, CString &msg) { HRESULT hr = S_OK; CComQIPtr<IHTMLDOCUMENT2> pDoc; CComQIPtr<IHTMLSELECTIONOBJECT> pSel; CComQIPtr<IHTMLTXTRANGE> pRange; CComQIPtr<IMARKUPSERVICES> pMarkupServices; CComQIPtr<IMARKUPPOINTER> pHtmlStart; CComQIPtr<IMARKUPPOINTER> pHtmlEnd; CComQIPtr<IHTMLELEMENT> pElement; CComBSTR bstrTagName; BOOL bRight = FALSE; CComBSTR bstrinnerText; /// msg = L""; /// if( ! (pDoc = pDocument) ) return E_FAIL; hr = pDoc->get_selection( & pSel ); if (hr || (!pSel) ) return E_FAIL; hr = pSel->createRange((IDispatch **)&pRange); if (hr || (!pRange)) return E_FAIL; hr = GetMarkupServices(pDocument, &pMarkupServices); if (hr || (!pMarkupServices) ) return E_FAIL; hr = pMarkupServices->CreateMarkupPointer( &pHtmlStart ); if (hr || (!pHtmlStart) ) return E_FAIL; hr = pMarkupServices->CreateMarkupPointer( &pHtmlEnd ); if (hr || (!pHtmlEnd)) return E_FAIL; hr = pMarkupServices->MovePointersToRange( pRange, pHtmlStart, pHtmlEnd ); if (hr) return E_FAIL; /// while (TRUE) { pElement = (IUnknown*)NULL; hr = pHtmlStart->IsRightOf(pHtmlEnd, &bRight); if( hr ) return E_FAIL; if( bRight ) break; hr = pHtmlStart->CurrentScope( &pElement ); if (hr) return E_FAIL; hr = pElement->get_tagName( &bstrTagName ); if (hr) return E_FAIL; hr = pElement->get_innerText( &bstrinnerText); if (hr) return E_FAIL; // CString ele_msg; PrintElement( pElement, ele_msg); msg += ele_msg; //move to next element hr = pHtmlStart->MoveUnit(MOVEUNIT_NEXTBLOCK); if (hr) return E_FAIL; } return S_OK; } HRESULT GetMarkupServices(IDispatch *pDocument, IMarkupServices ** pMarkupServices) { CComQIPtr<IHTMLDOCUMENT2> pDoc; CComQIPtr<IHTMLWINDOW2> pWindow; CComQIPtr<ISERVICEPROVIDER> pService; HRESULT hr = S_OK; pDoc = pDocument; if( ! pDoc) return E_FAIL; hr = pDoc->get_parentWindow( &pWindow ); if (hr) return E_FAIL; pService = pWindow; if ( !pService ) return E_FAIL; hr = pService->QueryService( CLSID_HTMLDocument, IID_IMarkupServices, (void **) pMarkupServices); if (hr) return E_FAIL; return S_OK; } //////////////////////////////////////// HRESULT PrintElement(IHTMLElement *pElement, CString &msg) { CComQIPtr<IHTMLIMGELEMENT> pImg( pElement ); CComBSTR bstrTagName; CComBSTR bstrinnerText; CComBSTR bstrSrc; HRESULT hr = S_OK; hr = pElement->get_tagName( &bstrTagName ); if (FAILED(hr)) return hr; hr = pElement->get_innerText( &bstrinnerText); if (FAILED(hr)) return hr; if( pImg ) { hr = pImg->get_src( &bstrSrc ); if (FAILED(hr)) return hr; } // CString ele_msg; ele_msg.Format("tagName=%S", bstrTagName); if( bstrinnerText.Length()) { ele_msg += ",innerText="; ele_msg += CString(bstrinnerText); } if( bstrSrc.Length()) { ele_msg += ",src="; ele_msg += CString(bstrSrc); } ele_msg += "\n"; //enum childres CComQIPtr<IHTMLELEMENTCOLLECTION> pAll; hr = pElement->get_all( (IDispatch**)& pAll ); if (FAILED(hr)) return hr; long count = 0; hr = pAll->get_length( & count ); if (FAILED(hr)) return hr; for(long i=0; i<COUNT; CComQIPtr<IDispatch index(i); CComVariant { i++)> pdisp; CComQIPtr<IHTMLELEMENT> pitem; hr = pAll->item( index, index, & pdisp ); if (FAILED(hr)) return hr; // pitem = pdisp; if( !pitem ) continue; PrintElement( pitem, ele_msg); // } // msg += ele_msg; // TRACE0( ele_msg ); return S_OK; }