再放200分!关于使用mshtml.dll解析网页的问题 ( 积分: 200 )

  • 主题发起人 主题发起人 shang_yan
  • 开始时间 开始时间
S

shang_yan

Unregistered / Unconfirmed
GUEST, unregistred user!
我在http://www.euromind.com/iedelphi上下载了IEParser控件,来解析网页以获得其中的网址。该控件是直接调用了mshtml.dll中的对象,而不是调用浏览器控件TWebBrowser。因此,当解析有弹出式窗口的html文档时,会弹出窗口。
这个控件根本没有OnNewWindow2事件,所以我找不到禁止弹出式窗口的任何办法,哪个高手能指点一下?
 
我在http://www.euromind.com/iedelphi上下载了IEParser控件,来解析网页以获得其中的网址。该控件是直接调用了mshtml.dll中的对象,而不是调用浏览器控件TWebBrowser。因此,当解析有弹出式窗口的html文档时,会弹出窗口。
这个控件根本没有OnNewWindow2事件,所以我找不到禁止弹出式窗口的任何办法,哪个高手能指点一下?
 
没有一个人回答,可能是我表述不清!我还是贴出部分源码吧!
我想知道的是,经过TIEParser.execute后,我如何能实现TWebBrowser中的OnNewWindow2那样的事件?
我想应该是有这个办法的,因为TwebBrowser和下面的代码都是基于IE中的MSHtml.dll的。
unit IEParser;

interface

uses
mshtml, Dialogs, Sysutils, Urlmon, ActiveX, Windows, Messages, Classes;


implementation


procedure TIEParser.Execute;//这是控件的初始化部分,在这里建立了Class_HtmlDocument的实例
var
boolWorking: Boolean;
E: IhtmlElement;
X, C: Integer;
ConnectionPoint: IConnectionPoint;
OleChar: array[0..MAX_PATH - 1] of TOleChar;
Msg: TMsg;
hr: HRESULT;
begin
//下面建立了Class_HtmlDocument的实例
CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IHTMLDocument2, Doc);
_Url := @OleChar;
MultibytetoWideChar(CP_ACP, 0, PChar(FUrl), -1, _Url, sizeof(OleChar));
(Doc as IOleObject).SetClientSite(self as IOleClientsite);
(Doc as IOleControl).OnAmbientPropertyChange(DISPID_AMBIENT_DLCONTROL);
(Doc as IConnectionPointContainer).FindConnectionPoint(IpropertyNotifySink, ConnectionPoint);
ConnectionPoint.Advise(Self as IPropertyNotifySink, C);
。。。
end;
 
连接到DWebBrowserEvents2就有了
 
这个概念我大概知道一点,但具体那要怎么连接呢?能否给点具体代码?
 
禁止弹出式窗口的办法:用bho
如果想看全部代码的话,到www.2ccc.com
查找:秋风网页广告拦截器1.2

unit uKiller;

{$WARN SYMBOL_PLATFORM OFF}

interface

uses
Windows, ActiveX, Classes, ComObj, Shdocvw, Dialogs, Variants;

type
TAdKillerBHO = class(TComObject, IObjectWithSite, IDispatch)
private
FIESite: IUnknown;
FIE: IWebBrowser2;
FCPC: IConnectionPointContainer;
FCP: IConnectionPoint;
FCookie: Integer;
protected
function SetSite(const pUnkSite: IUnknown): HResult; stdcall;
function GetSite(const riid: TIID; out site: IUnknown): HResult; stdcall;
function GetTypeInfoCount(out Count: Integer): HResult; stdcall;
function GetTypeInfo(Index, LocaleID: Integer; out TypeInfo): HResult;
stdcall;
function GetIDsOfNames(const IID: TGUID; Names: Pointer;
NameCount, LocaleID: Integer; DispIDs: Pointer): HResult; stdcall;
function Invoke(DispID: Integer; const IID: TGUID; LocaleID: Integer;
Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult;
stdcall;
procedure DoBeforeNavigate2(const pDisp: IDispatch; var URL: OleVariant; var Flags: OleVariant;
var TargetFrameName: OleVariant; var PostData: OleVariant;
var Headers: OleVariant; var Cancel: WordBool);
end;

const
AdKillerBHO: TGUID = '{A692062A-11A1-461B-BE98-B520F01F96FC}';

implementation

uses ComServ, Sysutils, ComConst;

var
WM_ADKILLER: Cardinal;

{ TAdKillerBHO }

procedure TAdKillerBHO.DoBeforeNavigate2(const pDisp: IDispatch; var URL,
Flags, TargetFrameName, PostData, Headers: OleVariant; var Cancel: WordBool);
var
hOwner: THandle;
begin
if FIE.ToolBar = 0 then
begin
hOwner := FindWindow('TfrmAdKiller', PAnsiChar('秋风网页广告拦截器1.2'));
if hOwner <> 0 then
begin
FIE.Quit;
PostMessage(hOwner, WM_ADKILLER, 0, GlobalAddAtom(PAnsiChar(VarToStrDef(URL, ''))));
end;
end;
end;

function TAdKillerBHO.GetIDsOfNames(const IID: TGUID; Names: Pointer;
NameCount, LocaleID: Integer; DispIDs: Pointer): HResult;
begin
Result := E_NOTIMPL;
end;

function TAdKillerBHO.GetSite(const riid: TIID;
out site: IInterface): HResult;
begin
if Supports(FIESite, riid, site) then
Result := S_OK
else
Result := E_NOINTERFACE;
end;

function TAdKillerBHO.GetTypeInfo(Index, LocaleID: Integer;
out TypeInfo): HResult;
begin
Result := E_NOTIMPL;
pointer(TypeInfo) := nil;
end;

function TAdKillerBHO.GetTypeInfoCount(out Count: Integer): HResult;
begin
Result := E_NOTIMPL;
Count := 0;
end;

procedure BuildPositionalDispIds(pDispIds: PDispIdList; const dps: TDispParams);
var
i: integer;
begin
Assert(pDispIds <> nil);
for i := 0 to dps.cArgs - 1 do
pDispIds^ := dps.cArgs - 1 - i;
if (dps.cNamedArgs <= 0) then
Exit;
for i := 0 to dps.cNamedArgs - 1 do
pDispIds^[dps.rgdispidNamedArgs^] := i;
end;

function TAdKillerBHO.Invoke(DispID: Integer; const IID: TGUID;
LocaleID: Integer; Flags: Word; var Params; VarResult, ExcepInfo,
ArgErr: Pointer): HResult;
var
dps: TDispParams absolute Params;
bHasParams: boolean;
pDispIds: PDispIdList;
iDispIdsSize: integer;
begin
pDispIds := nil;
iDispIdsSize := 0;
bHasParams := (dps.cArgs > 0);
if (bHasParams) then
begin
iDispIdsSize := dps.cArgs * SizeOf(TDispId);
GetMem(pDispIds, iDispIdsSize);
end;
try
if (bHasParams) then
BuildPositionalDispIds(pDispIds, dps);
Result := S_OK;
case DispId of
250:
DoBeforeNavigate2(IDispatch(dps.rgvarg^[pDispIds^[0]].dispval),
POleVariant(dps.rgvarg^[pDispIds^[1]].pvarval)^,
POleVariant(dps.rgvarg^[pDispIds^[2]].pvarval)^,
POleVariant(dps.rgvarg^[pDispIds^[3]].pvarval)^,
POleVariant(dps.rgvarg^[pDispIds^[4]].pvarval)^,
POleVariant(dps.rgvarg^[pDispIds^[5]].pvarval)^,
dps.rgvarg^[pDispIds^[6]].pbool^);
253:
FCP.Unadvise(FCookie);
else
Result := DISP_E_MEMBERNOTFOUND;
end;
finally
if (bHasParams) then FreeMem(pDispIds, iDispIdsSize);
end;
end;

function TAdKillerBHO.SetSite(const pUnkSite: IInterface): HResult;
begin
Result := E_FAIL;
FIESite := pUnkSite;
if not Supports(FIESite, IWebBrowser2, FIE) then Exit;
if not Supports(FIE, IConnectionPointContainer, FCPC) then Exit;
FCPC.FindConnectionPoint(DWebBrowserEvents2, FCP);
FCP.Advise(Self, FCookie);
Result := S_OK;
end;

procedure DeleteRegKeyValue(Root: DWORD; Key: string; ValueName: string = '');
var
KeyHandle: HKEY;
begin
if ValueName = '' then
RegDeleteKey(Root, PChar(Key));
if RegOpenKey(Root, PChar(Key), KeyHandle) = ERROR_SUCCESS then
try
RegDeleteValue(KeyHandle, PChar(ValueName));
finally
RegCloseKey(KeyHandle);
end;
end;

procedure CreateRegKeyValue(Root: DWORD; const Key, ValueName, Value: string);
var
Handle: HKey;
Status, Disposition: Integer;
begin
Status := RegCreateKeyEx(ROOT, PChar(Key), 0, '',
REG_OPTION_NON_VOLATILE, KEY_READ or KEY_WRITE, nil, Handle,
@Disposition);
if Status = 0 then
begin
Status := RegSetValueEx(Handle, PChar(ValueName), 0, REG_SZ,
PChar(Value), Length(Value) + 1);
RegCloseKey(Handle);
end;
if Status <> 0 then
raise EOleRegistrationError.CreateRes(@SCreateRegKeyError);
end;

type
TIEAdvBHOFactory = class(TComObjectFactory)
public
procedure UpdateRegistry(Register: Boolean); override;
end;

{ TIEAdvBHOFactory }

procedure TIEAdvBHOFactory.UpdateRegistry(Register: Boolean);
begin
inherited;
if Register then
CreateRegKeyValue(HKEY_LOCAL_MACHINE, 'Software/Microsoft/Windows/CurrentVersion/explorer/Browser Helper Objects/' + GuidToString(ClassID), '', '')
else
DeleteRegKeyValue(HKEY_LOCAL_MACHINE, 'Software/Microsoft/Windows/CurrentVersion/explorer/Browser Helper Objects/' + GuidToString(ClassID), '');
end;

initialization
TIEAdvBHOFactory.Create(ComServer, TAdKillerBHO, AdKillerBHO,
'TAdKillerBHO', '', ciMultiInstance, tmApartment);
WM_ADKILLER := RegisterWindowMessage('AdKiller');
end.
 
晕,你上面不是贴了连接到IpropertyNotifySink的代码?IpropertyNotifySink换成DWebBrowserEvents2就OK了。
 
To satanmokey:
虽然我帖出了连接到IpropertyNotyfySink的代码,但我是抄得别人的,我并不十分明白它的意思。只是通过看有关Com的书,略微知道一点点含义。
具体要怎么换成DWebBrowserEvents2呢
 
DWebBrowserEvents2是一个连接点,你找书看看COM的连接点的知识就知道了。

另外你的问题好像可以把doc.designMode :='off'来解决
 
大家还有什么高见?如果没有我就散分了
 
关键是Invoke里面的DoBeforeNavigate2,这里是网页转向的
 
我仍没有办法解决我的问题,烦请过路的高手再具体指点一下。
 
楼主是说这个代码吗?其实和楼上几位说的差不多的,Delphi也是封装了哪些东西而已,你看不是导入了MSHTML_LIB了吗?只是活的IHtmlDocument的方式不同而已,我们现在采用WebBrower也是为了活的那个接口

{ ************************************** }
{ UI_Less by Per Linds?Larsen }
{ }
{ UPDATES: }
{ http://www.euromind.com/ieDelphi }
{ }
{ For Delphi 4 & 5 }
{ }
{ lindsoe@po.ia.dk }
{ }
{ based on MS-demo WALKALL.CPP }
{****************************************}


{***********************************************
HISTORY

july 31, 1999 : Fixed some major bugs in INVOKE

************************************************}





{ How to use example ....

Uses UI_less;

procedure TForm1.Button1Click(Sender: TObject);
var
sh: TUILess;
begin
sh := TUILess.Create(nil);
GetAnchorList(sh.get('http://www.microsoft.com'),memo1.lines);
end;

}

unit UI_Less;

interface

uses
MsHtml_tlb, Urlmon, ActiveX, Windows, Messages, Classes;

const
WM_USER_STARTWALKING = WM_USER + 1;
DISPID_AMBIENT_DLCONTROL = (-5512);
READYSTATE_COMPLETE = $00000004;

DLCTL_DLIMAGES = $00000010;
DLCTL_VIDEOS = $00000020;
DLCTL_BGSOUNDS = $00000040;
DLCTL_NO_SCRIPTS = $00000080;
DLCTL_NO_JAVA = $00000100;
DLCTL_NO_RUNACTIVEXCTLS = $00000200;
DLCTL_NO_DLACTIVEXCTLS = $00000400;
DLCTL_DOWNLOADONLY = $00000800;
DLCTL_NO_FRAMEDOWNLOAD = $00001000;
DLCTL_RESYNCHRONIZE = $00002000;
DLCTL_PRAGMA_NO_CACHE = $00004000;
DLCTL_NO_BEHAVIORS = $00008000;
DLCTL_NO_METACHARSET = $00010000;
DLCTL_URL_ENCODING_DISABLE_UTF8 = $00020000;
DLCTL_URL_ENCODING_ENABLE_UTF8 = $00040000;
DLCTL_FORCEOFFLINE = $10000000;
DLCTL_NO_CLIENTPULL = $20000000;
DLCTL_SILENT = $40000000;
DLCTL_OFFLINEIFNOTCONNECTED = $80000000;
DLCTL_OFFLINE = DLCTL_OFFLINEIFNOTCONNECTED;


type


TUILess = class(TComponent, IUnknown,IDispatch, IPropertyNotifySink, IOleClientSite)
protected
/// IDISPATCH
function Invoke(DispID: Integer; const IID: TGUID; LocaleID: Integer;
Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult; stdcall;
/// IPROPERTYNOTIFYSINK
function OnChanged(dispid: TDispID): HResult; stdcall;
function OnRequestEdit(dispid: TDispID): HResult; stdcall;
/// IOLECLIENTSITE
function SaveObject: HResult; stdcall;
function GetMoniker(dwAssign: Longint; dwWhichMoniker: Longint;
out mk: IMoniker): HResult; stdcall;
function GetContainer(out container: IOleContainer): HResult; stdcall;
function ShowObject: HResult; stdcall;
function OnShowWindow(fShow: BOOL): HResult; stdcall;
function RequestNewObjectLayout: HResult; stdcall;
///
function LoadUrlFromMoniker: HResult;
function LoadUrlFromFile: HResult;
// * We only use LoadUrlFromMoniker, but we could use LoadUrlFromFile instead.

public
function Get(URL: PWidechar): IHTMLELEMENTCollection;
end;


/// Utils
procedure GetAnchorList(IC: IHTMLElementCollection; Anchorlist: TStrings);
procedure GetImageList(IC: IHTMLElementCollection; ImageList: TStrings);



implementation


var
Doc: IhtmlDocument2;
_URL: PwideChar;


/// CORE ---->>>>>>>>>

function TUILess.Get(Url: Pwidechar): IHtmlElementCollection;
var
Cookie: Integer;
CP: IConnectionPoint;
OleObject: IOleObject;
OleControl: IOleControl;
CPC : IConnectionPointContainer;
Msg: TMsg;
hr: HRESULT;
begin
_Url:=Url;
CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, Doc);
OleObject:=Doc as IOleObject;
OleObject.SetClientSite(self);
OleControl:=Doc as IOleControl;
OleControl.OnAmbientPropertyChange(DISPID_AMBIENT_DLCONTROL);
CPC:=Doc as IConnectionPointContainer;
CPC.FindConnectionPoint(IpropertyNotifySink, CP);
CP.Advise(self, Cookie);
HR := LoadUrlFromMoniker; // alternative: Hr:= LoadUrlFromFile;
if ((SUCCEEDED(HR)) or (HR = E_PENDING)) then
while (GetMessage(msg, 0, 0, 0)) do
if ((msg.message = WM_USER_STARTWALKING) and (msg.hwnd = 0)) then
begin
PostQuitMessage(0);
result := Doc.Get_all;
end else DispatchMessage(msg);
end;




function TUILess.Invoke(DispID: Integer; const IID: TGUID; LocaleID: Integer;
Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult;
var
I: Integer;
begin
if DISPID_AMBIENT_DLCONTROL = DispId then begin
I := DLCTL_DOWNLOADONLY + DLCTL_NO_SCRIPTS +
DLCTL_NO_JAVA + DLCTL_NO_DLACTIVEXCTLS +
DLCTL_NO_RUNACTIVEXCTLS;
PVariant(VarResult)^ := I;
Result := S_OK;
end else
Result := DISP_E_MEMBERNOTFOUND;
end;



function TUILess.OnChanged(dispid: TDispID): HResult;
var
dp: TDispParams;
vResult: OleVariant;
begin
if (DISPID_READYSTATE = Dispid) then
if SUCCEEDED((Doc as Ihtmldocument2).Invoke(DISPID_READYSTATE, GUID_null,
LOCALE_SYSTEM_DEFAULT, DISPATCH_PROPERTYGET, dp, @vresult, nil, nil)) then
if Integer(vresult) = READYSTATE_COMPLETE then
PostThreadMessage(GetCurrentThreadId(), WM_USER_STARTWALKING, 0, 0);
end;

function TUILess.LoadUrlFromMoniker: HResult;
var
Moniker: IMoniker;
BindCtx: IBindCTX;
PM: IPersistMoniker;
begin
createURLMoniker( nil, _Url, Moniker);
CreateBindCtx(0, BindCtx);
PM:=Doc as IPersistMoniker;
Result := PM.Load(LongBool(0), Moniker, BindCtx, STGM_READ)
end;

function TUILess.LoadUrlFromFile: HResult;
var
PF: IPersistfile;
begin
PF:=Doc as IPersistfile;
Result := PF.Load(_URL, 0);
end;


/// UTILILIES ---------- >>>>>>>>>>>>>>>>>>>>>



procedure GetImageList(IC: IHtmlElementCollection; ImageList: TStrings);
var
Image: IHTMLImgElement;
Disp: IDispatch;
x: Integer;
begin
if IC <> nil then begin
for x := 0 to IC.length - 1 do begin
Disp := IC.item(x, 0);
if SUCCEEDED(Disp.QueryInterface(IHTMLImgElement, Image))
then ImageList.add(Image.src);
end;
end;
end;


procedure GetAnchorList(Ic: IHTMLElementCollection; Anchorlist: TStrings);
var
Anchor: IHTMLAnchorElement;
Disp: IDispatch;
x: Integer;
begin
if IC <> nil then begin
for x := 0 to IC.length - 1 do begin
Disp := IC.item(x, 0);
if SUCCEEDED(Disp.QueryInterface(IHTMLAnchorElement, Anchor))
and (anchor.href <> '')
then Anchorlist.add(Anchor.href);
end;
end;
end;



/// Don't Care ------>>>>>>>>>>>


function TUILess.OnRequestEdit(dispid: TDispID): HResult;
begin
RESULT := E_NOTIMPL;
end;

function TUILess.SaveObject: HResult;
begin
result := E_NOTIMPL;
end;

function TUILess.GetMoniker(dwAssign: Longint; dwWhichMoniker: Longint;
out mk: IMoniker): HResult;
begin
result := E_NOTIMPL;
end;

function TUILess.GetContainer(out container: IOleContainer): HResult;
begin
result := E_NOTIMPL;
end;

function TUILess.ShowObject: HResult;
begin
result := E_NOTIMPL;
end;

function TUILess.OnShowWindow(fShow: BOOL): HResult;
begin
result := E_NOTIMPL;
end;

function TUILess.RequestNewObjectLayout: HResult;
begin
result := E_NOTIMPL;
end;

end.
 
to chenybin:
我用的正是你的代码。一般情况下,解析网页时也很正常。只是上述代码在解析有弹出式窗口的网页时,会弹出那个窗口,比如用它来解析www.163.com。用上述代码我无法禁止弹出式窗口。而且也不知道要如何改进上述代码。
 
弹出拦截用WebBrower可以做到,在OnBeforeNavite2里面拦截,你做一下看看,前几天回答了一个问题,就是防止弹出下载窗口的,

上面的代码我没有运行过,不知道怎么处理你说的情况,惭愧
 
如果只是在TWebbrowser控件中禁止弹出式窗口,当然很容易做得到。而我却是直接调用的mshtml.dll,里面没有OnNewWindow2事件,我也不知道如何来实现,所以问题没法解决。
 
问题没有解决。但仍散分!
 
多人接受答案了。
 
其实TWebBrower是通过TInternetExplorer的InvokeEvent事件来处理的

而mshtml里面处理的只是html的一些东西,也就是下面的东西
HTMLWindowEvents2 = dispinterface
['{3050F625-98B5-11CF-BB82-00AA00BDCE0B}']
procedure onload(const pEvtObj: IHTMLEventObj); dispid 1003;
procedure onunload(const pEvtObj: IHTMLEventObj); dispid 1008;
function onhelp(const pEvtObj: IHTMLEventObj): WordBool; dispid -2147418102;
procedure onfocus(const pEvtObj: IHTMLEventObj); dispid -2147418111;
procedure onblur(const pEvtObj: IHTMLEventObj); dispid -2147418112;
procedure onerror(const description: WideString; const url: WideString; line: Integer); dispid 1002;
procedure onresize(const pEvtObj: IHTMLEventObj); dispid 1016;
procedure onscroll(const pEvtObj: IHTMLEventObj); dispid 1014;
procedure onbeforeunload(const pEvtObj: IHTMLEventObj); dispid 1017;
procedure onbeforeprint(const pEvtObj: IHTMLEventObj); dispid 1024;
procedure onafterprint(const pEvtObj: IHTMLEventObj); dispid 1025;
end;

// *********************************************************************//
// DispIntf: HTMLWindowEvents
// Flags: (4112) Hidden Dispatchable
// GUID: {96A0A4E0-D062-11CF-94B6-00AA0060275C}
// *********************************************************************//
HTMLWindowEvents = dispinterface
['{96A0A4E0-D062-11CF-94B6-00AA0060275C}']
procedure onload; dispid 1003;
procedure onunload; dispid 1008;
function onhelp: WordBool; dispid -2147418102;
procedure onfocus; dispid -2147418111;
procedure onblur; dispid -2147418112;
procedure onerror(const description: WideString; const url: WideString; line: Integer); dispid 1002;
procedure onresize; dispid 1016;
procedure onscroll; dispid 1014;
procedure onbeforeunload; dispid 1017;
procedure onbeforeprint; dispid 1024;
procedure onafterprint; dispid 1025;
end;
里面没有关于新窗口的东西,所以我觉得用MSHTML似乎是不可能的


所以上面的代码中
function TUILess.Invoke(DispID: Integer; const IID: TGUID; LocaleID: Integer;
Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult;
var
I: Integer;
begin
if DISPID_AMBIENT_DLCONTROL = DispId then begin
I := DLCTL_DOWNLOADONLY + DLCTL_NO_SCRIPTS +
DLCTL_NO_JAVA + DLCTL_NO_DLACTIVEXCTLS +
DLCTL_NO_RUNACTIVEXCTLS;
PVariant(VarResult)^ := I;
Result := S_OK;
end else
Result := DISP_E_MEMBERNOTFOUND;
end;
如果dispid是251应该可以考虑用
251: if Assigned(FOnNewWindow2) then
FOnNewWindow2(Self, Params[0] (*var IDispatch*), Params[1] (*var WordBool*));
这样的方式,如果楼主没解决,看有借鉴意义没有,各人看法,仅供参考
 
后退
顶部