Click here to Skip to main content
14,668,361 members
Articles » Desktop Development » Shell and IE programming » IE Programming
Article
Posted 22 Mar 2005

Stats

67.5K views
1.1K downloads
46 bookmarked

How to identify the different elements in the selection of web pages?

Rate this:
4.64 (4 votes)
Please Sign up or sign in to vote.
4.64 (4 votes)
26 Apr 2005
How to identify the different elements in the selection of web pages?

Image 1

Introduction

First, I get an IHTMLDocument2 interface for the browser component and then get the selection property of the interface to get an IHTMLSelectionObject interface. Now came the tough part, actually parsing the stuff. I'd assumed that if I was able to create a control range using the createRange method for the selection, that I'd be able to get a list which would have the HTML tags and their attributes neatly separated.

Now, use IMarkupServices, I can enum all elements in selected portion of web browser. Here is the code:

//File:IESelection.CPP

#include <afxwin.h>
#include <afxdisp.h>

#include <atlbase.h>
//You may derive a class from CComModule and use it
//if you want to override
//something, but do not change the name of _Module
extern CComModule _Module;
#include <atlcom.h>

///
#include <mshtml.h>
#include <MsHtmcid.h>

///
HRESULT GetMarkupServices(IDispatch *pDocument, 
        IMarkupServices ** pMarkupServices );
HRESULT EnumSelectionElements(IDispatch *pDocument, CString &msg);

HRESULT PrintElement(IHTMLElement *pElement, CString &msg);
///
HRESULT EnumSelectionElements(IDispatch * pDocument, CString &msg)
{
    HRESULT                         hr     = S_OK;
    CComQIPtr<IHTMLDOCUMENT2>       pDoc;
    CComQIPtr<IHTMLSELECTIONOBJECT> pSel;
    CComQIPtr<IHTMLTXTRANGE>        pRange;
    CComQIPtr<IMARKUPSERVICES>      pMarkupServices;
    CComQIPtr<IMARKUPPOINTER>       pHtmlStart;
    CComQIPtr<IMARKUPPOINTER>       pHtmlEnd;
    CComQIPtr<IHTMLELEMENT>         pElement;
    CComBSTR                        bstrTagName;
   BOOL                            bRight = FALSE;
    CComBSTR                        bstrinnerText;
   
   ///
   msg = L"";

   ///
    if( ! (pDoc = pDocument) )
      return E_FAIL;
   
    hr = pDoc->get_selection( & pSel );
    if (hr || (!pSel) )
        return E_FAIL;
   
    hr = pSel->createRange((IDispatch **)&pRange);
    if (hr || (!pRange))
        return E_FAIL;
   
    hr = GetMarkupServices(pDocument, &pMarkupServices);
    if (hr || (!pMarkupServices) )
        return E_FAIL;
   
    hr = pMarkupServices->CreateMarkupPointer( &pHtmlStart );
    if (hr || (!pHtmlStart) )
        return E_FAIL;
   
    hr = pMarkupServices->CreateMarkupPointer( &pHtmlEnd );
    if (hr || (!pHtmlEnd))
        return E_FAIL;
   
    hr = pMarkupServices->MovePointersToRange( pRange, 
         pHtmlStart, pHtmlEnd );
    if (hr)
        return E_FAIL;
   
   ///
    while (TRUE)
    {
      pElement = (IUnknown*)NULL;

      hr = pHtmlStart->IsRightOf(pHtmlEnd, &bRight);
      if( hr )
         return E_FAIL;
      if( bRight )
         break;

      hr = pHtmlStart->CurrentScope( &pElement );
      if (hr)
         return E_FAIL;
      hr = pElement->get_tagName( &bstrTagName );
      if (hr)
         return E_FAIL;
      hr = pElement->get_innerText( &bstrinnerText);
      if (hr)
         return E_FAIL;
      //   
      CString ele_msg;
      PrintElement( pElement, ele_msg);
      
      msg += ele_msg;

      //move to next element
      hr = pHtmlStart->MoveUnit(MOVEUNIT_NEXTBLOCK);
      if (hr)
         return E_FAIL;
   }
   
   return S_OK;
}

HRESULT GetMarkupServices(IDispatch *pDocument, 
        IMarkupServices ** pMarkupServices)
{
    CComQIPtr<IHTMLDOCUMENT2>    pDoc;
    CComQIPtr<IHTMLWINDOW2>      pWindow;
    CComQIPtr<ISERVICEPROVIDER>  pService;
    HRESULT                      hr = S_OK;
   
    pDoc = pDocument;
   if( ! pDoc)
      return E_FAIL;

    hr = pDoc->get_parentWindow( &pWindow );
    if (hr)
      return E_FAIL;
   
    pService = pWindow;
    if ( !pService )
      return E_FAIL;
   
    hr = pService->QueryService( CLSID_HTMLDocument,
      IID_IMarkupServices,
      (void **) pMarkupServices);
    if (hr)
      return E_FAIL;
   return S_OK;
}

////////////////////////////////////////
HRESULT PrintElement(IHTMLElement *pElement, CString &msg)
{
   CComQIPtr<IHTMLIMGELEMENT>  pImg( pElement );
   CComBSTR  bstrTagName;
   CComBSTR  bstrinnerText;
   CComBSTR  bstrSrc;
   HRESULT   hr = S_OK;
   
   hr = pElement->get_tagName( &bstrTagName );
   if (FAILED(hr))
      return hr;
   hr = pElement->get_innerText( &bstrinnerText);
   if (FAILED(hr))
      return hr;
   if( pImg )
   {
      hr = pImg->get_src( &bstrSrc );
      if (FAILED(hr))
         return hr;
   }
   //
   CString ele_msg;
   ele_msg.Format("tagName=%S", bstrTagName);
   if( bstrinnerText.Length())
   {
      ele_msg += ",innerText=";
      ele_msg += CString(bstrinnerText);
   }
   if( bstrSrc.Length())
   {
      ele_msg += ",src=";
      ele_msg += CString(bstrSrc);
   }
   ele_msg += "\n";

   //enum childres
   CComQIPtr<IHTMLELEMENTCOLLECTION> pAll;
   hr = pElement->get_all( (IDispatch**)& pAll );
   if (FAILED(hr))
      return hr;

   long count = 0;
   hr = pAll->get_length( & count );
   if (FAILED(hr))
      return hr;
   for(long i=0; i<COUNT; CComQIPtr<IDispatch index(i); 
          CComVariant { i++)> pdisp;
          CComQIPtr<IHTMLELEMENT> pitem;
          hr = pAll->item( index, index, & pdisp );
        if (FAILED(hr))
           return hr;
      //
      pitem = pdisp;
      if( !pitem )
         continue;
      PrintElement( pitem, ele_msg);
      //
   }
   //
   msg += ele_msg;
   //
   TRACE0( ele_msg );
   return S_OK;
}

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

zhaowd2001
China China
No Biography provided

Comments and Discussions

 
Generalbug - if nothing selected, it gets blocked Pin
kamnas13-May-08 7:59
Memberkamnas13-May-08 7:59 
QuestionIMarkupServices Pin
AnithaE26-Feb-08 2:11
MemberAnithaE26-Feb-08 2:11 
Questionsource code format? Pin
codediscuss3-Feb-07 5:48
Membercodediscuss3-Feb-07 5:48 
GeneralStrange behavior: Overlapping tags [modified] Pin
Leonhardt Wille26-Apr-06 5:45
MemberLeonhardt Wille26-Apr-06 5:45 
Generaluse MOVEUNIT_NEXTWORDEND when calling MoveUnit Pin
zhaowd200125-Sep-05 20:48
Memberzhaowd200125-Sep-05 20:48 
GeneralWon't get selected img tags Pin
eshipman26-Apr-05 12:07
Membereshipman26-Apr-05 12:07 
Know how to get those from the selection?
GeneralRe: Won't get selected img tags Pin
zhaowd200126-Apr-05 17:30
Memberzhaowd200126-Apr-05 17:30 
GeneralJust use Firefox Pin
Anonymous31-Mar-05 16:23
MemberAnonymous31-Mar-05 16:23 
GeneralRe: Just use Firefox Pin
Leonhardt Wille1-Apr-05 10:09
MemberLeonhardt Wille1-Apr-05 10:09 
GeneralpWindow doesn't have IMarkupServices interface Pin
Leonhardt Wille31-Mar-05 4:23
MemberLeonhardt Wille31-Mar-05 4:23 
GeneralRe: pWindow doesn't have IMarkupServices interface Pin
zhaowd200131-Mar-05 16:01
Memberzhaowd200131-Mar-05 16:01 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.