A
abencat
Unregistered / Unconfirmed
GUEST, unregistred user!
如何取得WebBrowser所打开的网页的源码,如下方法都达不到效果,还请高手指点一下
比如得到这个页面中的内容(包含图片和文字)
带多框架的网站页面 http://www.s1188.com/
我使用的方法
procedure TForm1.Button3Click(Sender: TObject);
var
IDoc: IHTMLDocument2; //IHTMLDocument2 是MSHTML里面定义的
Text1: string;
begin
WebBrowser1.document.QueryInterface(IHTMLDocument2,IDOC);
if IDOC = nil then exit;
memo1.Lines.Text := IDOC.Body.innerHtml; //WebBrowser1当前的HTML源码
end;
procedure TForm1.Button4Click(Sender: TObject);
var
IDoc: IHTMLDocument2; //IHTMLDocument2 是MSHTML里面定义的
Text1: string;
begin
WebBrowser1.document.QueryInterface(IHTMLDocument2,IDOC);
if IDOC = nil then exit;
memo1.Lines.Text := IDOC.Body.innerText; //WebBrowser1当前的HTML源码
end;
//Memo1.Lines.Add(IHtmlDocument2(WebBrowser1.Document).Body.OuterHtml);
procedure TForm1.Button5Click(Sender: TObject);
procedure SetHtml(const WebBrowser:
TWebBrowser; const Html: string);
var
Stream: IStream;
hHTMLText: HGLOBAL;
psi: IPersistStreamInit;
begin
if not Assigned(WebBrowser.Document) then Exit;
hHTMLText := GlobalAlloc(GPTR, Length(Html) + 1);
if 0 = hHTMLText then RaiseLastWin32Error;
CopyMemory(Pointer(hHTMLText),
PChar(Html), Length(Html));
OleCheck(CreateStreamOnHGlobal
(hHTMLText, True, Stream));
try
OleCheck(WebBrowser.Document.
QueryInterface(IPersistStreamInit, psi));
try
OleCheck(psi.InitNew);
OleCheck(psi.Load(Stream));
finally
psi := nil;
end;
finally
Stream := nil;
end;
end;
function GetHtml(const WebBrowser:
TWebBrowser): string;
const
BufSize = $10000;
var
Size: Int64;
Stream: IStream;
hHTMLText: HGLOBAL;
psi: IPersistStreamInit;
begin
if not Assigned(WebBrowser.Document) then Exit;
OleCheck(WebBrowser.Document.QueryInterface
(IPersistStreamInit, psi));
try
//OleCheck(psi.GetSizeMax(Size));
hHTMLText := GlobalAlloc(GPTR, BufSize);
if 0 = hHTMLText then RaiseLastWin32Error;
OleCheck(CreateStreamOnHGlobal(hHTMLText,
True, Stream));
try
OleCheck(psi.Save(Stream, False));
Size := StrLen(PChar(hHTMLText));
SetLength(Result, Size);
CopyMemory(PChar(Result), Pointer(hHTMLText),
Size);
finally
Stream := nil;
end;
finally
psi := nil;
end;
end;
begin
Memo1.Lines.Add(GetHtml(WebBrowser1));
end;
procedure TForm1.Button6Click(Sender: TObject);
var
all:IHTMLElementcollection;
doc:IHTMLDocument2;
item:OleVariant;
begin
doc := WebBrowser1.document as ihtmldocument2;
all:=doc.all;
item:=all.item(0,varEmpty);
//item.innerhtml是源文件的<title>到</body>
//item.outerhtml是全文
Memo1.Lines.Add(item.innerhtml );
end;
这些都不可以得到实际的内容只能得到如下
<html>
<head>
<title>
Welcome 888crown</title>
<meta http-equiv="Content-Type" content="text/html;
charset=big5">
</head>
<frameset rows="*,0" frameborder="NO" border="0"
framespacing="0">
<frame name="SI2_mem_index" src="app/member/">
<frame name="SI2_func" scrolling="NO" noresize src="./ok.html">
<noscript><frame src=*></noscript>
</frameset>
<noframes>
<body bgcolor="#FFFFFF" text="#000000">
</body>
</noframes>
</html>
而不能同时得到框架中的图片和文字内容,还请指点
比如得到这个页面中的内容(包含图片和文字)
带多框架的网站页面 http://www.s1188.com/
我使用的方法
procedure TForm1.Button3Click(Sender: TObject);
var
IDoc: IHTMLDocument2; //IHTMLDocument2 是MSHTML里面定义的
Text1: string;
begin
WebBrowser1.document.QueryInterface(IHTMLDocument2,IDOC);
if IDOC = nil then exit;
memo1.Lines.Text := IDOC.Body.innerHtml; //WebBrowser1当前的HTML源码
end;
procedure TForm1.Button4Click(Sender: TObject);
var
IDoc: IHTMLDocument2; //IHTMLDocument2 是MSHTML里面定义的
Text1: string;
begin
WebBrowser1.document.QueryInterface(IHTMLDocument2,IDOC);
if IDOC = nil then exit;
memo1.Lines.Text := IDOC.Body.innerText; //WebBrowser1当前的HTML源码
end;
//Memo1.Lines.Add(IHtmlDocument2(WebBrowser1.Document).Body.OuterHtml);
procedure TForm1.Button5Click(Sender: TObject);
procedure SetHtml(const WebBrowser:
TWebBrowser; const Html: string);
var
Stream: IStream;
hHTMLText: HGLOBAL;
psi: IPersistStreamInit;
begin
if not Assigned(WebBrowser.Document) then Exit;
hHTMLText := GlobalAlloc(GPTR, Length(Html) + 1);
if 0 = hHTMLText then RaiseLastWin32Error;
CopyMemory(Pointer(hHTMLText),
PChar(Html), Length(Html));
OleCheck(CreateStreamOnHGlobal
(hHTMLText, True, Stream));
try
OleCheck(WebBrowser.Document.
QueryInterface(IPersistStreamInit, psi));
try
OleCheck(psi.InitNew);
OleCheck(psi.Load(Stream));
finally
psi := nil;
end;
finally
Stream := nil;
end;
end;
function GetHtml(const WebBrowser:
TWebBrowser): string;
const
BufSize = $10000;
var
Size: Int64;
Stream: IStream;
hHTMLText: HGLOBAL;
psi: IPersistStreamInit;
begin
if not Assigned(WebBrowser.Document) then Exit;
OleCheck(WebBrowser.Document.QueryInterface
(IPersistStreamInit, psi));
try
//OleCheck(psi.GetSizeMax(Size));
hHTMLText := GlobalAlloc(GPTR, BufSize);
if 0 = hHTMLText then RaiseLastWin32Error;
OleCheck(CreateStreamOnHGlobal(hHTMLText,
True, Stream));
try
OleCheck(psi.Save(Stream, False));
Size := StrLen(PChar(hHTMLText));
SetLength(Result, Size);
CopyMemory(PChar(Result), Pointer(hHTMLText),
Size);
finally
Stream := nil;
end;
finally
psi := nil;
end;
end;
begin
Memo1.Lines.Add(GetHtml(WebBrowser1));
end;
procedure TForm1.Button6Click(Sender: TObject);
var
all:IHTMLElementcollection;
doc:IHTMLDocument2;
item:OleVariant;
begin
doc := WebBrowser1.document as ihtmldocument2;
all:=doc.all;
item:=all.item(0,varEmpty);
//item.innerhtml是源文件的<title>到</body>
//item.outerhtml是全文
Memo1.Lines.Add(item.innerhtml );
end;
这些都不可以得到实际的内容只能得到如下
<html>
<head>
<title>
Welcome 888crown</title>
<meta http-equiv="Content-Type" content="text/html;
charset=big5">
</head>
<frameset rows="*,0" frameborder="NO" border="0"
framespacing="0">
<frame name="SI2_mem_index" src="app/member/">
<frame name="SI2_func" scrolling="NO" noresize src="./ok.html">
<noscript><frame src=*></noscript>
</frameset>
<noframes>
<body bgcolor="#FFFFFF" text="#000000">
</body>
</noframes>
</html>
而不能同时得到框架中的图片和文字内容,还请指点